[
  {
    "path": ".gitignore",
    "content": "Makefile.in\n/aclocal.m4\n/config.guess\n/config.h.in\n/config.log\n/config.sub\n/config.status\n/configure\n/depcomp\n/install-sh\n/ltmain.sh\n/missing\n/ar-lib\n/autom4te.cache\n/compile\n/libtool\n/stamp-*\n*.o\n*.a\n*.so\n*.la\n*.tar*\ndoc/adept_*.log\ndoc/adept_*.toc\ndoc/adept_*.aux\ndoc/adept_*.out\n.deps\n*~\nMakefile\n!test/Makefile\n!doc/Makefile\ninclude/adept_source.h\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: cpp\nos: linux\nsudo: required\ndist: trusty\ncompiler:\n  - gcc\nbefore_install:\n  - sudo apt-get install gfortran -y\n  - type gfortran\ninstall: autoreconf -i && ./configure && make -j8 \nscript: \n  - make check -j8\n  - cat test/test_results.txt\n"
  },
  {
    "path": "AUTHORS",
    "content": "Robin Hogan <r.j.hogan@ecmwf.int>"
  },
  {
    "path": "COPYING",
    "content": "\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "ChangeLog",
    "content": "version 2.1.4 (in progress)\n\t- Added support for the copysign function\n\t- Added aArray::set_gradient(Array) function\n\nversion 2.1.3 (22 Feb 2024)\n\t- Added interp2d and interp3d interpolation functions\n\t- Added option of nearest-neighbour interpolation\n\nversion 2.1.2 (3 Oct 2023)\n\t- Further bug fixes to reduction of active arrays which did not\n\thave addequate space allocated by check_space, including \"product\"\n\twhich requires an additional differential operation per element\n\t- Fixed out-of-bounds access in test_thread_safe_arrays\n\t- Slight change to reduce_dimension to avoid incorrect warning\n\tabout ExpressionSize array subscript of -1\n\t- Fixed broken benchmark/autodiff_benchmark to work with ADOL-C\n\t- Changed COMPILE_FLAGS argument order in test/Makefile in case\n\tCPPFLAGS contains Timer.h or other conflicting header file\n\t- Added benchmark/math_benchmark program\n\nversion 2.1.1 (10 April 2022)\n\t- interp function can perform 1D interpolation of higher\n\tdimensional Y arrays\n\t- Bug fix in reduction of an \"n\" dimensional active array to an\n\t\"n-1\" dimensional array: check_space had been forgotten\n\t- Added Newton-Levenberg[-Marquardt] options to test_minimizer,\n\twhich use the exact Hessian of the Rosenbrock banana function\n\nversion 2.1 (5 February 2021)\n\t- Removed README in favour of README.md\n\nversion 2.0.9 (28 January 2021)\n\t- Fix bug in Array::alignment_offset causing occasional\n\tcrashes reduce and assign operations due to unaligned AVX access,\n\tnow tested in test_packet_operations\n\t- Added Conjugate-Gradient and L-BFGS minimization methods, both\n\tbounded and unbounded methods\n\t- Disabled vectorization on 32-bit ARM NEON targets as there are\n\tinsufficient floating-point intrinsics\n\t- Fixed interp(x,y,xi) function in case x and y have 0 or 1\n\telements\n\nversion 2.0.8 (22 August 2020)\n\t- Added adept_optimize.h header file providing minimization\n\tcapability, initially with the constrained and unconstrained\n\tLevenberg-Marquardt minimization algorithm\n\t- Test program test_minimizer tests with the N-dimensional\n\tRosenbrock function\n\t- The Stack member function \"jacobian\" can now operate on or\n\treturn Adept matrices, rather than solely on raw pointers which\n\thad to point to data in column-major order\n\t- Removed \"using namespace internal\" from several header files so\n\tthat adept namespace is clean\n\t- Fixed C++98 compatibility\n\nversion 2.0.7 (23 June 2020)\n\t- Added fast, vectorizable exponential function \"fastexp\", or can\n\tuse as adept::exp if the ADEPT_FAST_EXPONENTIAL preprocessor\n\tvariable is defined\n\t- Moved all the vector intrinsic stuff to quick_e.h\n\t- Added ARM-NEON support to quick_e.h\n\t- Adept is now thread safe on Mac OS versions that support the\n\tthread_local keyword\n\t- Fixed bug that caused incorrect differentiation of\n\tActive<double>/int\n\t- Preprocessor option ADEPT_INIT_REAL_SNAN and\n\tADEPT_INIT_REAL_ZERO initialize real numbers (and complex numbers)\n\tto signaling NaN or zero, useful for debugging\n\t- Fixed bug that caused incorrect result of maxval and minval\n\tapplied to active arrays\n\t- Fixed bug that caused incorrect differentiation of \"product\"\n\tfunction\n\t- Fixed bug that caused incorrect norm2 for passive vector large\n\tenough to use vectorization\n\nversion 2.0.6 (20 February 2020)\n\t- Fixed bug in hand-coded adjoint of Toon advection scheme\n\t(benchmark/advection_schemes_AD.h), as well as other bugs that\n\twould have prevented the Adjoint and hand-coded adjoints from\n\tbeing correct compared to each other\n\t- Fixed memory leak in Packet.h by ensuring memory is freed in the\n\tcase that neither _POSIX_VERSION nor _MSC_VER are defined\n\t- Fixed bug in FixedArray.h that prevented active fixed arrays\n\tfrom registering themselves with the stack when initialized using\n\tan initializer list\n\t- Fixed missing \"template\" directives in UnaryOperation.h that\n\tprevented isfinite, isnan and isinf from working correctly on\n\tarrays\n\t- Added Array::resize_contigous functions\n\t- minval and maxval now work correctly with negative and +/-Inf\n\targuments; previously minval gave incorrect results even for\n\tnegative arguments\n\t- Added array_fortran.h to provide the ability to exchange arrays\n\tbetween C++/Adept and Fortran, for those Fortran compilers that\n\tsupport the 2018 standard\n\t- Added support for AVX512 vectorization: operations on 16 floats\n\tand 8 doubles at a time;\n\t- Added test_packet_operations to check Intel vector intrinsics\n\tcorrectly implemented\n\nversion 2.0.5 (6 February 2018)\n\t- Use set_array_print_style(x) to set behaviour of <<Array;\n\tavailable are x=PRINT_STYLE_[PLAIN|CSV|CURLY|MATLAB]\n\t- Fix use of _mm_undefined_ps intrinsic: only use on GCC>=4.9.1\n\tand Clang if appropriate built-in is present; can't guarantee its\n\tpresence with other compilers\n\t- Fix writing of active scalar expressions to a stream\n\t- Added missing fmin/fmax(Expr,Scalar)\n\nversion 2.0.4 (8 January 2018)\n\t- Packet.h copes with undefined _mm_undefined_ps in GCC<4.9.1\n\t- Fix Packet.h in case SSE2 not enabled\n\t- ADEPT_FAST preprocessor variable enables\n\tADEPT_NO_DIMENSION_CHECKING, ADEPT_NO_ALIAS_CHECKING and\n\tADEPT_STACK_THREAD_UNSAFE\n\t- Divide by scalar now only converts to multiply by (1.0/scalar)\n\tif scalar is of floating-point type; this fixes indexing with\n\t\"end/2\"\n\t- Fix bug in Packet.h (found by valgrind) to ensure new[] followed\n\tby delete[] and posix_memalign followed by free\n\t- Increase initial stack size from 1000 to 1024^2\n\t- Fixed two bugs in IndexedArray.h that broke indexing a matrix\n\twith Matrix(int,intVector)\n\t- Allocated memory in non-OpenMP jacobian_forward is now freed\n\nversion 2.0.3 (28 October 2017)\n\t- Replaced template class \"cast\" with \"expr_cast\" to avoid clash\n\twith Expression's non-template member function; this enables\n\tcompilation with Visual C++.\n\t- Added adept::have_matrix_multiplication() and\n\tadept::have_linear_algebra() to test for BLAS and LAPACK\n\t(respectively) at run-time\n\nversion 2.0.2 (21 October 2017)\n\t- Fixed standards-compliance problem with use of Expression in\n\tCuriously Recurring Template Pattern, by removing any \"static\n\tconst\" members that referred to the derived class.  This enabled\n\tthe same code to work with g++, clang++ and the Intel compiler icc.\n\nversion 2.0.1 (18 October 2017)\n\t- Basic passive complex arrays work, tested with\n\ttest/test_complex_arrays\n\t- Added ADEPT_NO_DIMENSION_CHECKING option\n\t- Vectorized sqrt, unary-, unary+, max and min\n\t- Removed the option to vectorize with Packet representing a\n\t*pair* of SSE2/AVX packed vector; now a Packet can only represent\n\ta single packed vector. This simplifies maintenance of Packet.h,\n\tand the pair option offered no performance advantage anyway.\n\t- Vectorized reduce operations sum, product etc.\n\t- Many fixes to enable compilation with clang++\n\t- Fixed FixedArray::operator[] for rank>1\n\nversion 2.0 (September 2017)\n\t- Finalized version for release\n\t- PDF documentation is no longer installed, so that Git users are\n\tnot obliged to have pdflatex\n\nversion 1.9.11 (30 September 2017)\n\t- Fixed get_gradient member function of Array and FixedArray\n\t- Added test_array_derivatives test program\n\t- Fixed indexing of FixedArrays of rank>1\n\t- Fixed IndexedArray applied to FixedArrays (before had reference\n\tto temporary dimension object\n\t- Test and benchmarking programs now work with single precision\n\t- Stack functions accept Index passed by value rather than\n\treference, so that \"static const int\" passed from FixedArray does\n\tnot need to be explicitly instantiated\n\t- Active::add_derivative_dependence and\n\tappend_derivative_dependence no longer only accept arguments of\n\ttype \"Real\"\n\t- ADEPT_STORAGE_THREAD_SAFE option to protect Storage reference\n\tcounter in multi-threaded environment (C++11 only)\n\t- Added Array::soft_link() as another means to get thread safety\n\t- Added test program test_thread_safe_arrays\n\t- Added adept_reference latex file to doc directory\n\t- Added \"dimensions\" function for creating ExpressionSize objects\n\nversion 1.9.10 (25 September 2017)\n\t- Added link syntax A >>= B\n\t- Added assignment and initialization from initializer_lists for\n\tArray and FixedArray classes\n\t- Implemented Fortran-like \"count\" reduction function\n\t- Bug fix sending active expression to a stream with \"<<\"\n\t- Added \"spread<dim>(array,n)\" to match Fortran spread(array,dim,n)\n\t- Added outer_product(x,y)\n\t- Fixed adept_source.h for non-Unix systems\n\t- Moved mathematical functions from global to adept namespace\n\t- Fixed pausable recording and added test_adept_active_pausable\n\t- Removed unsafe ADEPT_COPY_CONSTRUCTOR_ONLY_ON_RETURN_FROM_FUNCTION\n\t- C++98 and C++11 correctly take cmath functions from :: and std::\n\trespectively\n\t- \"make check\" now runs test script test/run_tests.sh\n\t- inv and solve now take general expression arguments\n\t- Enabled indexed arrays to be assigned to an initializer list\n\t- BLAS now optional (without it matrix multiplication causes\n\trun-time exception)\n\t- Added test_derivatives to test quality of derivatives for all\n\tmathematical functions\n\t- Enabled SpecialMatrix and IndexedArray to be assigned to an\n\tactive scalar expression\n\t- Added fmax and fmin functions (even if C++11 not used)\n\t- Added atan2 support\n\t- C++11 on non-Mac platforms uses thread_local keyword instead of\n\tC++98 compiler extensions\n\t- Matrix multiplication on active special matrices implemented by\n\tcopying them to a dense Array<2,Real,true>. Very inefficient, but\n\tit works.\n\t- Matrix multiplication on inactive triangular and \"square\"\n\tmatrices now works by converting to them to a dense Array<2,Real,false>.\n\t- Added alias detection in IndexedArray\n\t- Alias detection in IndexedArray and SpecialMatrix can be\n\tdeactivated with ADEPT_NO_ALIAS_CHECKING\n\t- Added \"eval\" function to evaluate an expression that might be\n\tsubject to aliasing\n\nversion 1.9.9 (August 2017)\n\t- Put on GitHub as rjhogan/Adept-2\n\t- Added Expression::next_value_contiguous for faster inner loops\n\tin the case that all expressions have a contiguous and increasing\n\tinner dimension\n\t- Preliminary vectorization via Packet class and\n\tExpression::next_packet\n\t- Vectorized forward Jacobian calculation using packets\n\t- Split Expression.h into also UnaryOperation.h and BinaryOperation.h\n\t- Fixed bug in matmul.h that causes failure if matrix in\n\tmatrix-vector multiplication is strided in both dimensions\n\t- Added move semantics if C++11 enabled\n\nversion 1.9.8 (April 2016):\n\t- Completed FixedArray.h and tested for active arguments\n\t- Added array_shortcuts for FixedArrays: (a)VectorX, (a)MatrixXX\n\t- Added array_shortcuts for Arrays: (a)ArrayXD (for X = 3 to 7)\n\t- interp permits general Expression arguments\n\nversion 1.9.7 (April 2016):\n\t- Nearly completed FixedArray.h\n\nversion 1.9.6 (March 2016):\n\t- Started FixedArray.h\n\nversion 1.9.5 (March 2016):\n\t- Fixed add_derivative_dependence and append_derivative_dependence\n\twhen applied to elements of arrays\n\t- Added ADEPT_BOUNDS_CHECKING capability, and fixed IndexedArray\n\tto work with this\n\t- Now call BLAS and LAPACK (Fortran) routines, rather than C-BLAS\n\tand LAPACKE functions\n\t- Added matrix multiplication benchmark program\n\t- Added IndexedArray for dimensions up to 7\n\t- Added Array::data() and Array::const_data() for direct access\n\t- Added Array::subset(); slightly more concise than using \"range\"\n\nversion 1.9.4 (January 2016):\n\t- Completed changes to documentation in doc directory\n\t- Added control/inquiry of settings, e.g. set_max_blas_threads()\n\tand configuration()\n\nversion 1.9.3 (December 2015):\n\t- Added \"max\" and \"min\" as binary operators (note that \"maxval\"\n\tand \"minval\" are reduction operators as in Fortran)\n\nversion 1.9.2 (December 2015):\n\t- Added ActiveConstReference type for active constant references\n\nversion 1.9.1 (November 2015):\n\t- New matmul.h/matmul.cpp - not yet complete\n\nversion 1.9.0 (November 2015):\n\t- SUBSTANTIAL REWRITE TO INCORPORATE ARRAY FUNCTIONALITY\n\nversion 1.1 (June 2015):\n\t- Added ./configure script using autotools\n\t- Added support for additional mathematical functions: asinh,\n\tacosh, atanh, expm1, log1p, cbrt, erf, erfc, exp2, log2\n\t- Changed license from GNU General Public License to Apache\n\tLicense, Version 2.0\n\t- Jacobian calculation uses OpenMP parallelization\n\t- Removed multiscatter example code\n\t- New benchmarking program in benchmark/ that compares to other\n\tautomatic differentiation tools if available\n\t- Fixed bug so that gaps in the gradient list now merge properly\n\t- Provided capability to compile code without an external library,\n\tto facilitate porting to Windows\n\t- Added programs in test/ demonstrating checkpointing,\n\tthread-safety and compiling without an external library\n\nversion 1.0 (September 2013):\n\t- Very many internal changes and added features\n\t- Detailed documentation in the doc/ directory\n\t- Removed the LIFO requirement on the order with which aReal\n\tobjects ought to be created and destroyed\n\t- For users of version 0.9, the main change to the interface is\n\tthat the Stack::start() member function is no longer supported;\n\trather you should call the Stack::new_recording() member function\n\t*after* the independent variables have been initialized but\n\t*before* any mathematical operations are performed using them\n\nversion 0.9:\n\t- First public release\n"
  },
  {
    "path": "INSTALL",
    "content": "Installation Instructions\n*************************\n\nCopyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,\n2006, 2007 Free Software Foundation, Inc.\n\nThis file is free documentation; the Free Software Foundation gives\nunlimited permission to copy, distribute and modify it.\n\nBasic Installation\n==================\n\nBriefly, the shell commands `./configure; make; make install' should\nconfigure, build, and install this package.  The following\nmore-detailed instructions are generic; see the `README' file for\ninstructions specific to this package.\n\n   The `configure' shell script attempts to guess correct values for\nvarious system-dependent variables used during compilation.  It uses\nthose values to create a `Makefile' in each directory of the package.\nIt may also create one or more `.h' files containing system-dependent\ndefinitions.  Finally, it creates a shell script `config.status' that\nyou can run in the future to recreate the current configuration, and a\nfile `config.log' containing compiler output (useful mainly for\ndebugging `configure').\n\n   It can also use an optional file (typically called `config.cache'\nand enabled with `--cache-file=config.cache' or simply `-C') that saves\nthe results of its tests to speed up reconfiguring.  Caching is\ndisabled by default to prevent problems with accidental use of stale\ncache files.\n\n   If you need to do unusual things to compile the package, please try\nto figure out how `configure' could check whether to do them, and mail\ndiffs or instructions to the address given in the `README' so they can\nbe considered for the next release.  If you are using the cache, and at\nsome point `config.cache' contains results you don't want to keep, you\nmay remove or edit it.\n\n   The file `configure.ac' (or `configure.in') is used to create\n`configure' by a program called `autoconf'.  You need `configure.ac' if\nyou want to change it or regenerate `configure' using a newer version\nof `autoconf'.\n\nThe simplest way to compile this package is:\n\n  1. `cd' to the directory containing the package's source code and type\n     `./configure' to configure the package for your system.\n\n     Running `configure' might take a while.  While running, it prints\n     some messages telling which features it is checking for.\n\n  2. Type `make' to compile the package.\n\n  3. Optionally, type `make check' to run any self-tests that come with\n     the package.\n\n  4. Type `make install' to install the programs and any data files and\n     documentation.\n\n  5. You can remove the program binaries and object files from the\n     source code directory by typing `make clean'.  To also remove the\n     files that `configure' created (so you can compile the package for\n     a different kind of computer), type `make distclean'.  There is\n     also a `make maintainer-clean' target, but that is intended mainly\n     for the package's developers.  If you use it, you may have to get\n     all sorts of other programs in order to regenerate files that came\n     with the distribution.\n\n  6. Often, you can also type `make uninstall' to remove the installed\n     files again.\n\nCompilers and Options\n=====================\n\nSome systems require unusual options for compilation or linking that the\n`configure' script does not know about.  Run `./configure --help' for\ndetails on some of the pertinent environment variables.\n\n   You can give `configure' initial values for configuration parameters\nby setting variables in the command line or in the environment.  Here\nis an example:\n\n     ./configure CC=c99 CFLAGS=-g LIBS=-lposix\n\n   *Note Defining Variables::, for more details.\n\nCompiling For Multiple Architectures\n====================================\n\nYou can compile the package for more than one kind of computer at the\nsame time, by placing the object files for each architecture in their\nown directory.  To do this, you can use GNU `make'.  `cd' to the\ndirectory where you want the object files and executables to go and run\nthe `configure' script.  `configure' automatically checks for the\nsource code in the directory that `configure' is in and in `..'.\n\n   With a non-GNU `make', it is safer to compile the package for one\narchitecture at a time in the source code directory.  After you have\ninstalled the package for one architecture, use `make distclean' before\nreconfiguring for another architecture.\n\nInstallation Names\n==================\n\nBy default, `make install' installs the package's commands under\n`/usr/local/bin', include files under `/usr/local/include', etc.  You\ncan specify an installation prefix other than `/usr/local' by giving\n`configure' the option `--prefix=PREFIX'.\n\n   You can specify separate installation prefixes for\narchitecture-specific files and architecture-independent files.  If you\npass the option `--exec-prefix=PREFIX' to `configure', the package uses\nPREFIX as the prefix for installing programs and libraries.\nDocumentation and other data files still use the regular prefix.\n\n   In addition, if you use an unusual directory layout you can give\noptions like `--bindir=DIR' to specify different values for particular\nkinds of files.  Run `configure --help' for a list of the directories\nyou can set and what kinds of files go in them.\n\n   If the package supports it, you can cause programs to be installed\nwith an extra prefix or suffix on their names by giving `configure' the\noption `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.\n\nOptional Features\n=================\n\nSome packages pay attention to `--enable-FEATURE' options to\n`configure', where FEATURE indicates an optional part of the package.\nThey may also pay attention to `--with-PACKAGE' options, where PACKAGE\nis something like `gnu-as' or `x' (for the X Window System).  The\n`README' should mention any `--enable-' and `--with-' options that the\npackage recognizes.\n\n   For packages that use the X Window System, `configure' can usually\nfind the X include and library files automatically, but if it doesn't,\nyou can use the `configure' options `--x-includes=DIR' and\n`--x-libraries=DIR' to specify their locations.\n\nSpecifying the System Type\n==========================\n\nThere may be some features `configure' cannot figure out automatically,\nbut needs to determine by the type of machine the package will run on.\nUsually, assuming the package is built to be run on the _same_\narchitectures, `configure' can figure that out, but if it prints a\nmessage saying it cannot guess the machine type, give it the\n`--build=TYPE' option.  TYPE can either be a short name for the system\ntype, such as `sun4', or a canonical name which has the form:\n\n     CPU-COMPANY-SYSTEM\n\nwhere SYSTEM can have one of these forms:\n\n     OS KERNEL-OS\n\n   See the file `config.sub' for the possible values of each field.  If\n`config.sub' isn't included in this package, then this package doesn't\nneed to know the machine type.\n\n   If you are _building_ compiler tools for cross-compiling, you should\nuse the option `--target=TYPE' to select the type of system they will\nproduce code for.\n\n   If you want to _use_ a cross compiler, that generates code for a\nplatform different from the build platform, you should specify the\n\"host\" platform (i.e., that on which the generated programs will\neventually be run) with `--host=TYPE'.\n\nSharing Defaults\n================\n\nIf you want to set default values for `configure' scripts to share, you\ncan create a site shell script called `config.site' that gives default\nvalues for variables like `CC', `cache_file', and `prefix'.\n`configure' looks for `PREFIX/share/config.site' if it exists, then\n`PREFIX/etc/config.site' if it exists.  Or, you can set the\n`CONFIG_SITE' environment variable to the location of the site script.\nA warning: not all `configure' scripts look for a site script.\n\nDefining Variables\n==================\n\nVariables not defined in a site shell script can be set in the\nenvironment passed to `configure'.  However, some packages may run\nconfigure again during the build, and the customized values of these\nvariables may be lost.  In order to avoid this problem, you should set\nthem in the `configure' command line, using `VAR=value'.  For example:\n\n     ./configure CC=/usr/local2/bin/gcc\n\ncauses the specified `gcc' to be used as the C compiler (unless it is\noverridden in the site shell script).\n\nUnfortunately, this technique does not work for `CONFIG_SHELL' due to\nan Autoconf bug.  Until the bug is fixed you can use this workaround:\n\n     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash\n\n`configure' Invocation\n======================\n\n`configure' recognizes the following options to control how it operates.\n\n`--help'\n`-h'\n     Print a summary of the options to `configure', and exit.\n\n`--version'\n`-V'\n     Print the version of Autoconf used to generate the `configure'\n     script, and exit.\n\n`--cache-file=FILE'\n     Enable the cache: use and save the results of the tests in FILE,\n     traditionally `config.cache'.  FILE defaults to `/dev/null' to\n     disable caching.\n\n`--config-cache'\n`-C'\n     Alias for `--cache-file=config.cache'.\n\n`--quiet'\n`--silent'\n`-q'\n     Do not print messages saying which checks are being made.  To\n     suppress all normal output, redirect it to `/dev/null' (any error\n     messages will still be shown).\n\n`--srcdir=DIR'\n     Look for the package's source code in directory DIR.  Usually\n     `configure' can determine that directory automatically.\n\n`configure' also accepts some other, not widely useful, options.  Run\n`configure --help' for more details.\n\n"
  },
  {
    "path": "Makefile.am",
    "content": "dist_pkgdata_DATA = README.md\npkgdata_DATA = COPYING ChangeLog NEWS AUTHORS\nSUBDIRS = adept include benchmark test\n# The test/ directory does not use automake so we need to specify the\n# files that will be included in the distribution\nEXTRA_DIST = test/Makefile test/README test/*.cpp test/*.h test/run_tests.sh \\\n\tdoc/Makefile doc/README doc/COPYING doc/*.tex \nACLOCAL_AMFLAGS = -I m4\n"
  },
  {
    "path": "NEWS",
    "content": "version 2.0\n\t- Fixed pausable recording and library-free compilation to provide full backwards compatibility with version 1.1\n\t- C++11 features such as initializer lists\n\t- Automatic vectorization of passive array statements if possible\n\t- Additional mathematical functions: round, trunc, rint, nearbyint, atan2, fmin, fmax\n\t- Additional array operations: spread, outer_product, count, maxval, minval, reshape\n\t- Many more test programs\n\nversion 1.9.8 (April 2016)\n\t- First beta release of version 2.0 incorporating array capability up to 7 dimensions\n\t- Matrix multiplication and basic linear from BLAS and LAPACK\n\t- Options for thread-safe accessing of arrays\n\nversion 1.1 (June 2015)\n\t- Added ./configure script\n\t- Added support for additional mathematical functions: asinh, acosh, atanh, expm1, log1p, cbrt, erf, erfc, exp2, log2\n\t- License changed to Apache License, Version 2.0"
  },
  {
    "path": "README.md",
    "content": "# Adept 2: Combined array and automatic differentiation library in C++\n\n## Introduction\n\nThe Adept version 2.1 software library provides three different\nfunctionalities:\n\n* Its automatic differentiation capability enables algorithms written\n  in C++ to be differentiated with little code modification, very\n  useful for a wide range of applications that involve mathematical\n  optimization. It is backwards compatible with and as fast as Adept\n  1.1. The name \"Adept\" refers to \"Automatic Differentiation using\n  Expression Templates\".\n\n* Its array capability provides support for vectors, matrices, arrays\n  of up to 7 dimensions and linear algebra. Adept 2 uses a single\n  expression-template framework under the hood to enable array\n  operations to be differentiated with very good computational\n  performance.\n\n* Its optimization capability provides the various minimization\n  algorithms (Levenberg, Levenberg-Marquardt, Conjugate Gradient and\n  Limited Memory BFGS) each of which can be used with or without box\n  constraints on the state variables. The interface to the\n  optimization functionality is in terms of Adept vectors and matrices.\n\nIf you are not interested in the array or optimization capabilities of\nAdept 2 then Adept 1.1 may be more to your liking as a very\nlightweight library that has virtually all the\nautomatic-differentiation capabilities of version 2.\n\n\n## Documentation and links\n\n* The [Adept web site](http://www.met.reading.ac.uk/clouds/adept/) for formal Adept releases\n* The [Adept-2 GitHub page](https://github.com/rjhogan/Adept-2) for the latest snapshot\n* The [Adept-1.1 GitHub page](https://github.com/rjhogan/Adept) for the older (scalar) library\n* A detailed [User Guide](http://www.met.reading.ac.uk/clouds/adept/adept_documentation.pdf)\n* A paper describing the automatic differentiation capability: [Hogan, R. J., 2014: Fast reverse-mode automatic differentiation using expression templates in C++. *ACM Trans. Math. Softw.* **40,** 26:1-26:16](http://www.met.reading.ac.uk/~swrhgnrj/publications/adept.pdf)\n* The [Adept Wikipedia page](https://en.wikipedia.org/wiki/Adept_(C++_library))\n* Bug fixes, and queries not answered by the documentation, should be addressed to Robin Hogan (r.j.hogan at ecmwf.int)\n\n## Installation\n\nTo build Adept from a GitHub snapshot, first do the following to\nrecreate the configure script (requiring the autotools package):\n\n    autoreconf -i\n\nFormal release packages already contain a configure script. The normal\nbuild sequence is then:\n\n    ./configure\n    make\n    make check\n    make install\n\nPlease consult the User Guide for further installation options; in\nparticular, if you plan to make serious us of matrix multiplication\nand linear algebra then you should compile Adept to use an optimized\nBLAS library such as OpenBLAS.\n\n\n## License and copyright\n\nThe code in this package has a mix of copyright owners:\n\nCopyright (C) 2012-2015 University of Reading\n\nCopyright (C) 2015-     European Centre for Medium-Range Weather Forecasts\n\nTwo licenses are used for the code in this package:\n\n* The files that form the Adept library are distributed under the\n  conditions of the Apache License, Version 2 - see the COPYING file\n  for details.  This is a permissive free-software license but one\n  that does impose a few conditions if you intend to distribute\n  derivative works.  The files this license applies to are those in\n  the include/ and adept/ directories, and the subdirectories below\n  them.\n\n* All code in the test/ and benchmark/ directories is subject to the\n  terms of the GNU all-permissive license, given at the top of those\n  files - basically you can do what you like with the code from these\n  files.\n\nIf you use Adept in published scientific work then it is requested\nthat you cite the Hogan (2014) paper above, but this is not a\ncondition of the license.\n"
  },
  {
    "path": "TODO",
    "content": "BUGS\nspread<DIM> function does not use the right DIM\n\nDESIRABLE BUT NEEDS NEW STACK\nDifferentiated BLAS operations on symmetric matrices etc\nImplement general OpenMP for forward pass\n\nOPTIMIZATION\nVectorize active expressions\nFix vectorization of spread and outer_product by storing pointer to start of row and not using index\nCommunicate band diagonals statically to optimize Array = band expression (e.g. 2*TridiagMatrix)\nImplement active scalar precomputation\nOptimize reciprocal to use 1.0 or 1.0f; vectorize\nOptimize storage of data range\nSquareMatrix::is_vectorizable = true\n\nFEATURES\nlong double calls double matmul functions?\nstd::string configuration function returning options for this compilation unit\nMathematical functions copysign, fdim, hypot, remainder?\nImplement user elemental function\nImplement user choice of Jacobian array ordering\nClean-up benchmark and test_arrays/test_array_speed code\nCheck can do Array<*,Active<Real>,false>\nRename ExpressionSize\nEnable functions taking ExpressionSize arguments (e.g. resize and array constructor) to take equivalent arguments, e.g. std::vector, initializer lists etc\nFall-back if BLAS not available\nImplement pow<int> and sqr\nImplement non-member functions merge?, reshape, shape?, size, [un]pack(?), minloc, maxloc\nImplement matlab-like tile (generic repmat) plus zeros and ones\nImplement iterators\nTriangular/symmetric views\nConst link does not increment reference counter\nCannot link non-const to const either by construction or explicit link\nShould reduce functions take dimensions as template arguments?\nreduce operations have a template version with the reduce dimension provided statically\ndifferentiate complex number operations\nmatmul and solve on complex numbers\ncomplex functions arg, abs, real, imag etc\n\nCHECK\nCheck Square matmul\nAll vectorization combinations work, e.g. double/int, aligned/unaligned LHS\nSet whole arrays as independent/dependent\nReduce RMS difference in Toon case\n\nCLEAN\nReferences to OpenMP for array operations - remove?\n\nDOCUMENTATION\nDocument diag_vector non-member function (in reduce.h) and test in test_arrays\n\nOLDER IDEAS\nClarify vector orientation when in matrix multiplication\nVector orientation changed with row(), col()?\nImplement move semantics and make copy constructors do deep copy ADEPT_***\nImplement OpenMP passive array operations\nImplement OpenMP active array operations\nLink can only be performed on empty object\n\n\nIf new Expression types are to be added, they should provide the\nfollowing interface:\n\n      static const int  rank_      = 0;\n      static const int  n_scratch_ = 0;\n      static const int  n_active_ = 0;\n      static const int  n_arrays_ = 0;\n      static const bool is_active_ = false;\n      static const bool is_vectorizable_ = true;\n\n      bool get_dimensions_(ExpressionSize<0>& dim) const;\n\n      std::string expression_string_() const;\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const;\n\n      Type value_with_len_(const Index& j, const Index& len) const;\n\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const;\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const;\n\n      template <int MyArrayNum, int NArrays>\n      Packet<Type>\n      packet_at_location_(const ExpressionSize<NArrays>& loc) const;\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const;\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const;\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const;\n\n      template <int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  const MyType& multiplier) const;\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const;\n"
  },
  {
    "path": "adept/Array.cpp",
    "content": "/* Array.cpp -- Functions and global variables controlling array behaviour\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n\n#include <adept/Array.h>\n\nnamespace adept {\n  namespace internal {\n    bool array_row_major_order = true;\n    //    bool array_print_curly_brackets = true;\n\n    // Variables describing how arrays are written to a stream\n    ArrayPrintStyle array_print_style = PRINT_STYLE_CURLY;\n    std::string vector_separator = \", \";\n    std::string vector_print_before = \"{\";\n    std::string vector_print_after = \"}\";\n    std::string array_opening_bracket = \"{\";\n    std::string array_closing_bracket = \"}\";\n    std::string array_contiguous_separator = \", \";\n    std::string array_non_contiguous_separator = \",\\n\";\n    std::string array_print_before = \"\\n{\";\n    std::string array_print_after = \"}\";\n    std::string array_print_empty_before = \"(empty rank-\";\n    std::string array_print_empty_after = \" array)\";\n    bool array_print_indent = true;\n    bool array_print_empty_rank = true;\n  }\n\n  void set_array_print_style(ArrayPrintStyle ps) {\n    using namespace internal;\n    switch (ps) {\n    case PRINT_STYLE_PLAIN:\n       vector_separator = \" \";\n       vector_print_before = \"\";\n       vector_print_after = \"\";\n       array_opening_bracket = \"\";\n       array_closing_bracket = \"\";\n       array_contiguous_separator = \" \";\n       array_non_contiguous_separator = \"\\n\";\n       array_print_before = \"\";\n       array_print_after = \"\";\n       array_print_empty_before = \"(empty rank-\";\n       array_print_empty_after = \" array)\";\n       array_print_indent = false;\n       array_print_empty_rank = true;\n       break;\n    case PRINT_STYLE_CSV:\n       vector_separator = \", \";\n       vector_print_before = \"\";\n       vector_print_after = \"\";\n       array_opening_bracket = \"\";\n       array_closing_bracket = \"\";\n       array_contiguous_separator = \", \";\n       array_non_contiguous_separator = \"\\n\";\n       array_print_before = \"\";\n       array_print_after = \"\";\n       array_print_empty_before = \"empty\";\n       array_print_empty_after = \"\";\n       array_print_indent = false;\n       array_print_empty_rank = false;\n       break;\n    case PRINT_STYLE_MATLAB:\n       vector_separator = \" \";\n       vector_print_before = \"[\";\n       vector_print_after = \"]\";\n       array_opening_bracket = \"[\";\n       array_closing_bracket = \"]\";\n       array_contiguous_separator = \" \";\n       array_non_contiguous_separator = \";\\n\";\n       array_print_before = \"[\";\n       array_print_after = \"]\";\n       array_print_empty_before = \"[\";\n       array_print_empty_after = \"]\";\n       array_print_indent = true;\n       array_print_empty_rank = false;\n       break;\n    case PRINT_STYLE_CURLY:\n       vector_separator = \", \";\n       vector_print_before = \"{\";\n       vector_print_after = \"}\";\n       array_opening_bracket = \"{\";\n       array_closing_bracket = \"}\";\n       array_contiguous_separator = \", \";\n       array_non_contiguous_separator = \",\\n\";\n       array_print_before = \"\\n{\";\n       array_print_after = \"}\";\n       array_print_empty_before = \"(empty rank-\";\n       array_print_empty_after = \" array)\";\n       array_print_indent = true;\n       array_print_empty_rank = true;\n       break;\n    default:\n      throw invalid_operation(\"Array print style not understood\");\n    }\n    array_print_style = ps;\n  }\n\n}\n"
  },
  {
    "path": "adept/Makefile.am",
    "content": "lib_LTLIBRARIES = libadept.la\nlibadept_la_SOURCES = Array.cpp Stack.cpp StackStorageOrig.cpp \\\n\tjacobian.cpp Storage.cpp index.cpp settings.cpp \\\n\tcppblas.cpp cpplapack.h solve.cpp inv.cpp \\\n\tvector_utilities.cpp Minimizer.cpp \\\n\tminimize_limited_memory_bfgs.cpp minimize_levenberg_marquardt.cpp \\\n\tminimize_conjugate_gradient.cpp line_search.cpp\n\nlibadept_la_CPPFLAGS = -I@top_srcdir@/include\n"
  },
  {
    "path": "adept/Minimizer.cpp",
    "content": "/* Minimizer.h -- class for minimizing the cost function of an optimizable object\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <cctype>\n\n#include <adept/Minimizer.h>\n#include <adept/exception.h>\n\nnamespace adept {\n\n  // List of the names of available minimizer algorithms\n  static const char* minimizer_algorithm_names_[]\n    = {\"L-BFGS\",\n       \"Conjugate-Gradient\",\n       \"Conjugate-Gradient-FR\",\n       \"Levenberg\",\n       \"Levenberg-Marquardt\"};\n\n  // Lower-case versions of the list above\n  static const char* minimizer_algorithm_lower_names_[]\n    = {\"l-bfgs\",\n       \"conjugate-gradient\",\n       \"conjugate-gradient-fr\",\n       \"levenberg\",\n       \"levenberg-marquardt\"};\n\n  // Convert to lower case, and convert spaces and underscores to\n  // hyphens. This function is used to do a case-insensitive\n  // string-based selection of the minimizer algorithm to use.\n  static void to_lower_in_place(std::string& str) {\n    for (std::string::size_type istr = 0; istr < str.size(); ++istr) {\n      str[istr] = std::tolower(str[istr]);\n      if (str[istr] == ' ' || str[istr] == '_') {\n\tstr[istr] = '-';\n      }\n    }\n  }\n\n  // Return a C string describing the minimizer status\n  const char*\n  minimizer_status_string(MinimizerStatus status)\n  {\n    switch (status) {\n    case MINIMIZER_STATUS_SUCCESS:\n      return \"Converged\";\n      break;\n    case MINIMIZER_STATUS_EMPTY_STATE:\n      return \"Empty state vector, no minimization performed\";\n      break;\n    case MINIMIZER_STATUS_MAX_ITERATIONS_REACHED:\n      return \"Maximum iterations reached\";\n      break;\n    case MINIMIZER_STATUS_FAILED_TO_CONVERGE:\n      return \"Failed to converge\";\n      break;\n    case MINIMIZER_STATUS_DIRECTION_UPHILL:\n      return \"Search direction points uphill\";\n      break;\n    case MINIMIZER_STATUS_BOUND_REACHED:\n      return \"Bound reached\"; // Should not be returned from a minimize function\n      break;\n    case MINIMIZER_STATUS_INVALID_COST_FUNCTION:\n      return \"Non-finite cost function\";\n      break;\n    case MINIMIZER_STATUS_INVALID_GRADIENT:\n      return \"Non-finite gradient\";\n      break;\n    case MINIMIZER_STATUS_INVALID_BOUNDS:\n      return \"Invalid bounds for bounded minimization\";\n      break;\n    case MINIMIZER_STATUS_NOT_YET_CONVERGED:\n      return \"Minimization still in progress\";\n      break;\n    default:\n      return \"Status unrecognized\";\n    }\n  }\n\n  // Case-insensitive setting of the miminization algorithm given its\n  // name\n  void\n  Minimizer::set_algorithm(const std::string& algo) {\n    std::string algo_lower = algo;\n    to_lower_in_place(algo_lower);\n\n    std::cout << \"Checking \\\"\" << algo_lower << \"\\\"\\n\";\n\n    for (int ialgo = 0;\n\t ialgo < static_cast<int>(MINIMIZER_ALGORITHM_NUMBER_AVAILABLE);\n\t ++ialgo) {\n      if (algo_lower == minimizer_algorithm_lower_names_[ialgo]) {\n\tset_algorithm(static_cast<MinimizerAlgorithm>(ialgo));\n\treturn;\n      }\n    }\n    throw optimization_exception(\"Algorithm name not understood\");\n  }\n\n  std::string\n  Minimizer::algorithm_name() {\n    int ialgo = static_cast<MinimizerAlgorithm>(algorithm_);\n    if (ialgo >= 0 && ialgo < MINIMIZER_ALGORITHM_NUMBER_AVAILABLE) {\n      return minimizer_algorithm_names_[ialgo];\n    }\n    else {\n      return \"Unknown\";\n    }\n  }\n\n  // Unconstrained minimization\n  MinimizerStatus\n  Minimizer::minimize(Optimizable& optimizable, Vector x)\n  {\n    if (minimizer_algorithm_order(algorithm_) > 1\n\t&& !optimizable.provides_derivative(2)) {\n      throw optimization_exception(\"2nd-order minimization algorithm requires optimizable that can provide 2nd derivatives\");\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS) {\n      return minimize_limited_memory_bfgs(optimizable, x);\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT) {\n      return minimize_conjugate_gradient(optimizable, x);\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR) {\n      return minimize_conjugate_gradient(optimizable, x, true);\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG) {\n      return minimize_levenberg_marquardt(optimizable, x, true);\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT) {\n      return minimize_levenberg_marquardt(optimizable, x, false);\n    }\n    else {\n      throw optimization_exception(\"Minimization algorithm not recognized\");\n    }\n  }\n\n  // Constrained minimization\n  MinimizerStatus\n  Minimizer::minimize(Optimizable& optimizable, Vector x,\n\t\t      const Vector& x_lower, const Vector& x_upper)\n  {\n    if (minimizer_algorithm_order(algorithm_) > 1\n\t&& !optimizable.provides_derivative(2)) {\n      throw optimization_exception(\"2nd-order minimization algorithm requires optimizable that can provide 2nd derivatives\");\n    }\n    if (algorithm_ == MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS) {\n      return minimize_limited_memory_bfgs_bounded(optimizable, x,\n\t\t\t\t\t\t  x_lower, x_upper);\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT) {\n      return minimize_conjugate_gradient_bounded(optimizable, x,\n\t\t\t\t\t\t x_lower, x_upper);\n    }\n    else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR) {\n      return minimize_conjugate_gradient_bounded(optimizable, x,\n\t\t\t\t\t\t x_lower, x_upper, true);\n    }\n    if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG) {\n      return minimize_levenberg_marquardt_bounded(optimizable, x,\n\t\t\t\t\t\t  x_lower, x_upper, true);\n    }\n    if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT) {\n      return minimize_levenberg_marquardt_bounded(optimizable, x,\n\t\t\t\t\t\t  x_lower, x_upper, false);\n    }\n    else {\n      throw optimization_exception(\"Constrained minimization algorithm not recognized\");\n    }\n  }\n\n};\n"
  },
  {
    "path": "adept/Stack.cpp",
    "content": "/* Stack.cpp -- Stack for storing automatic differentiation information\n\n     Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n\n#include <iostream>\n#include <cstring> // For memcpy\n\n\n#ifdef _OPENMP\n#include <omp.h>\n#endif\n\n#include <adept/Stack.h>\n\n\nnamespace adept {\n\n  using namespace internal;\n\n  // Global pointers to the current thread, the second of which is\n  // thread safe. The first is only used if ADEPT_STACK_THREAD_UNSAFE\n  // is defined.\n  ADEPT_THREAD_LOCAL Stack* _stack_current_thread = 0;\n  Stack* _stack_current_thread_unsafe = 0;\n\n  // MEMBER FUNCTIONS OF THE STACK CLASS\n\n  // Destructor: frees dynamically allocated memory (if any)\n  Stack::~Stack() {\n    // If this is the currently active stack then set to NULL as\n    // \"this\" is shortly to become invalid\n    if (is_thread_unsafe_) {\n      if (_stack_current_thread_unsafe == this) {\n\t_stack_current_thread_unsafe = 0; \n      }\n    }\n    else if (_stack_current_thread == this) {\n      _stack_current_thread = 0; \n    }\n#ifndef ADEPT_STACK_STORAGE_STL\n    if (gradient_) {\n      delete[] gradient_;\n    }\n#endif\n  }\n  \n  // Make this stack \"active\" by copying its \"this\" pointer to a\n  // global variable; this makes it the stack that aReal objects\n  // subsequently interact with when being created and participating\n  // in mathematical expressions\n  void\n  Stack::activate()\n  {\n    // Check that we don't already have an active stack in this thread\n    if ((is_thread_unsafe_ && _stack_current_thread_unsafe \n\t && _stack_current_thread_unsafe != this)\n\t|| ((!is_thread_unsafe_) && _stack_current_thread\n\t    && _stack_current_thread != this)) {\n      throw(stack_already_active());\n    }\n    else {\n      if (!is_thread_unsafe_) {\n\t_stack_current_thread = this;\n      }\n      else {\n\t_stack_current_thread_unsafe = this;\n      }\n    }    \n  }\n\n  \n  // Set the maximum number of threads to be used in Jacobian\n  // calculations, if possible. A value of 1 indicates that OpenMP\n  // will not be used, while a value of 0 indicates that the number\n  // will match the number of available processors. Returns the\n  // maximum that will be used, which will be 1 if the Adept library\n  // was compiled without OpenMP support. Note that a value of 1 will\n  // disable the use of OpenMP with Adept, so Adept will then use no\n  // OpenMP directives or function calls. Note that if in your program\n  // you use OpenMP with each thread performing automatic\n  // differentiaion with its own independent Adept stack, then\n  // typically only one OpenMP thread is available for each Jacobian\n  // calculation, regardless of whether you call this function.\n  int\n  Stack::set_max_jacobian_threads(int n)\n  {\n#ifdef _OPENMP\n    if (have_openmp_) {\n      if (n == 1) {\n\topenmp_manually_disabled_ = true;\n\treturn 1;\n      }\n      else if (n < 1) {\n\topenmp_manually_disabled_ = false;\n\tomp_set_num_threads(omp_get_num_procs());\n\treturn omp_get_max_threads();\n      }\n      else {\n\topenmp_manually_disabled_ = false;\n\tomp_set_num_threads(n);\n\treturn omp_get_max_threads();\n      }\n    }\n#endif\n    return 1;\n  }\n\n\n  // Return maximum number of OpenMP threads to be used in Jacobian\n  // calculation\n  int \n  Stack::max_jacobian_threads() const\n  {\n#ifdef _OPENMP\n    if (have_openmp_) {\n      if (openmp_manually_disabled_) {\n\treturn 1;\n      }\n      else {\n\treturn omp_get_max_threads();\n      }\n    }\n#endif\n    return 1;\n  }\n\n\n  // Perform to adjoint computation (reverse mode). It is assumed that\n  // some gradients have been assigned already, otherwise the function\n  // returns with an error.\n  void\n  Stack::compute_adjoint()\n  {\n    if (gradients_are_initialized()) {\n      // Loop backwards through the derivative statements\n      for (uIndex ist = n_statements_-1; ist > 0; ist--) {\n\tconst Statement& statement = statement_[ist];\n\t// We copy the RHS gradient (LHS in the original derivative\n\t// statement but swapped in the adjoint equivalent) to \"a\" in\n\t// case it appears on the LHS in any of the following statements\n\tReal a = gradient_[statement.index];\n\tgradient_[statement.index] = 0.0;\n\t// By only looping if a is non-zero we gain a significant speed-up\n\tif (a != 0.0) {\n\t  // Loop over operations\n\t  for (uIndex i = statement_[ist-1].end_plus_one;\n\t       i < statement.end_plus_one; i++) {\n\t    gradient_[index_[i]] += multiplier_[i]*a;\n\t  }\n\t}\n      }\n    }  \n    else {\n      throw(gradients_not_initialized());\n    }  \n  }\n\n\n  // Perform tangent linear computation (forward mode). It is assumed\n  // that some gradients have been assigned already, otherwise the\n  // function returns with an error.\n  void\n  Stack::compute_tangent_linear()\n  {\n    if (gradients_are_initialized()) {\n      // Loop forward through the statements\n      for (uIndex ist = 1; ist < n_statements_; ist++) {\n\tconst Statement& statement = statement_[ist];\n\t// We copy the LHS to \"a\" in case it appears on the RHS in any\n\t// of the following statements\n\tReal a = 0.0;\n\tfor (uIndex i = statement_[ist-1].end_plus_one;\n\t     i < statement.end_plus_one; i++) {\n\t  a += multiplier_[i]*gradient_[index_[i]];\n\t}\n\tgradient_[statement.index] = a;\n      }\n    }\n    else {\n      throw(gradients_not_initialized());\n    }\n  }\n\n\n\n  // Register n gradients\n  uIndex\n  Stack::do_register_gradients(const uIndex& n) {\n    n_gradients_registered_ += n;\n    if (!gap_list_.empty()) {\n      uIndex return_val;\n      // Insert in a gap, if there is one big enough\n      for (GapListIterator it = gap_list_.begin();\n\t   it != gap_list_.end(); it++) {\n\tuIndex len = it->end + 1 - it->start;\n\tif (len > n) {\n\t  // Gap a bit larger than needed: reduce its size\n\t  return_val = it->start;\n\t  it->start += n;\n\t  return return_val;\n\t}\n\telse if (len == n) {\n\t  // Gap exactly the size needed: fill it and remove from list\n\t  return_val = it->start;\n\t  if (most_recent_gap_ == it) {\n\t    gap_list_.erase(it);\n\t    most_recent_gap_ = gap_list_.end();\n\t  }\n\t  else {\n\t    gap_list_.erase(it);\n\t  }\n\t  return return_val;\n\t}\n      }\n    }\n    // No suitable gap found; instead add to end of gradient vector\n    i_gradient_ += n;\n    if (i_gradient_ > max_gradient_) {\n      max_gradient_ = i_gradient_;\n    }\n    return i_gradient_ - n;\n  }\n  \n\n  // If an aReal object is deleted, its gradient_index is\n  // unregistered from the stack.  If this is at the top of the stack\n  // then this is easy and is done inline; this is the usual case\n  // since C++ trys to deallocate automatic objects in the reverse\n  // order to that in which they were allocated.  If it is not at the\n  // top of the stack then a non-inline function is called to ensure\n  // that the gap list is adjusted correctly.\n  void\n  Stack::unregister_gradient_not_top(const uIndex& gradient_index)\n  {\n    enum {\n      ADDED_AT_BASE,\n      ADDED_AT_TOP,\n      NEW_GAP,\n      NOT_FOUND\n    } status = NOT_FOUND;\n    // First try to find if the unregistered element is at the\n    // start or end of an existing gap\n    if (!gap_list_.empty() && most_recent_gap_ != gap_list_.end()) {\n      // We have a \"most recent\" gap - check whether the gradient\n      // to be unregistered is here\n      Gap& current_gap = *most_recent_gap_;\n      if (gradient_index == current_gap.start - 1) {\n\tcurrent_gap.start--;\n\tstatus = ADDED_AT_BASE;\n      }\n      else if (gradient_index == current_gap.end + 1) {\n\tcurrent_gap.end++;\n\tstatus = ADDED_AT_TOP;\n      }\n      // Should we check for erroneous removal from middle of gap?\n    }\n    if (status == NOT_FOUND) {\n      // Search other gaps\n      for (GapListIterator it = gap_list_.begin();\n\t   it != gap_list_.end(); it++) {\n\tif (gradient_index <= it->end + 1) {\n\t  // Gradient to unregister is either within the gap\n\t  // referenced by iterator \"it\", or it is between \"it\"\n\t  // and the previous gap in the list\n\t  if (gradient_index == it->start - 1) {\n\t    status = ADDED_AT_BASE;\n\t    it->start--;\n\t    most_recent_gap_ = it;\n\t  }\n\t  else if (gradient_index == it->end + 1) {\n\t    status = ADDED_AT_TOP;\n\t    it->end++;\n\t    most_recent_gap_ = it;\n\t  }\n\t  else {\n\t    // Insert a new gap of width 1; note that list::insert\n\t    // inserts *before* the specified location\n\t    most_recent_gap_\n\t      = gap_list_.insert(it, Gap(gradient_index));\n\t    status = NEW_GAP;\n\t  }\n\t  break;\n\t}\n      }\n      if (status == NOT_FOUND) {\n\tgap_list_.push_back(Gap(gradient_index));\n\tmost_recent_gap_ = gap_list_.end();\n\tmost_recent_gap_--;\n      }\n    }\n    // Finally check if gaps have merged\n    if (status == ADDED_AT_BASE\n\t&& most_recent_gap_ != gap_list_.begin()) {\n      // Check whether the gap has merged with the next one\n      GapListIterator it = most_recent_gap_;\n      it--;\n      if (it->end == most_recent_gap_->start - 1) {\n\t// Merge two gaps\n\tmost_recent_gap_->start = it->start;\n\tgap_list_.erase(it);\n      }\n    }\n    else if (status == ADDED_AT_TOP) {\n      GapListIterator it = most_recent_gap_;\n      it++;\n      if (it != gap_list_.end()\n\t  && it->start == most_recent_gap_->end + 1) {\n\t// Merge two gaps\n\tmost_recent_gap_->end = it->end;\n\tgap_list_.erase(it);\n      }\n    }\n  }\t\n\n\n  // Unregister n gradients starting at gradient_index\n  void\n  Stack::unregister_gradients(const uIndex& gradient_index,\n\t\t\t      const uIndex& n)\n  {\n    n_gradients_registered_ -= n;\n    if (gradient_index+n == i_gradient_) {\n      // Gradient to be unregistered is at the top of the stack\n      i_gradient_ -= n;\n      if (!gap_list_.empty()) {\n\tGap& last_gap = gap_list_.back();\n\tif (i_gradient_ == last_gap.end+1) {\n\t  // We have unregistered the elements between the \"gap\" of\n\t  // unregistered element and the top of the stack, so can set\n\t  // the variables indicating the presence of the gap to zero\n\t  i_gradient_ = last_gap.start;\n\t  GapListIterator it = gap_list_.end();\n\t  it--;\n\t  if (most_recent_gap_ == it) {\n\t    most_recent_gap_ = gap_list_.end();\n\t  }\n\t  gap_list_.pop_back();\n\t}\n      }\n    }\n    else { // Gradients to be unregistered not at top of stack.\n      enum {\n\tADDED_AT_BASE,\n\tADDED_AT_TOP,\n\tNEW_GAP,\n\tNOT_FOUND\n      } status = NOT_FOUND;\n      // First try to find if the unregistered element is at the start\n      // or end of an existing gap\n      if (!gap_list_.empty() && most_recent_gap_ != gap_list_.end()) {\n\t// We have a \"most recent\" gap - check whether the gradient\n\t// to be unregistered is here\n\tGap& current_gap = *most_recent_gap_;\n\tif (gradient_index == current_gap.start - n) {\n\t  current_gap.start -= n;\n\t  status = ADDED_AT_BASE;\n\t}\n\telse if (gradient_index == current_gap.end + 1) {\n\t  current_gap.end += n;\n\t  status = ADDED_AT_TOP;\n\t}\n\t/*\n\telse if (gradient_index > current_gap.start - n\n\t\t && gradient_index < current_gap.end + 1) {\n\t  std::cout << \"** Attempt to find \" << gradient_index << \" in gaps \";\n\t  print_gaps();\n\t  std::cout << \"\\n\";\n\t  throw invalid_operation(\"Gap list corruption\");\n\t}\n\t*/\n\t// Should we check for erroneous removal from middle of gap?\n      }\n      if (status == NOT_FOUND) {\n\t// Search other gaps\n\tfor (GapListIterator it = gap_list_.begin();\n\t     it != gap_list_.end(); it++) {\n\t  if (gradient_index <= it->end + 1) {\n\t    // Gradient to unregister is either within the gap\n\t    // referenced by iterator \"it\", or it is between \"it\" and\n\t    // the previous gap in the list\n\t    if (gradient_index == it->start - n) {\n\t      status = ADDED_AT_BASE;\n\t      it->start -= n;\n\t      most_recent_gap_ = it;\n\t    }\n\t    else if (gradient_index == it->end + 1) {\n\t      status = ADDED_AT_TOP;\n\t      it->end += n;\n\t      most_recent_gap_ = it;\n\t    }\n\t    /*\n\t    else if (gradient_index > it->start - n) {\n\t      std::cout << \"*** Attempt to find \" << gradient_index << \" in gaps \";\n\t      print_gaps();\n\t      std::cout << \"\\n\";\n\t      throw invalid_operation(\"Gap list corruption\");\n\t    }\n\t    */\n\t    else {\n\t      // Insert a new gap; note that list::insert inserts\n\t      // *before* the specified location\n\t      most_recent_gap_\n\t\t= gap_list_.insert(it, Gap(gradient_index,\n\t\t\t\t\t   gradient_index+n-1));\n\t      status = NEW_GAP;\n\t    }\n\t    break;\n\t  }\n\t}\n\tif (status == NOT_FOUND) {\n\t  gap_list_.push_back(Gap(gradient_index,\n\t\t\t\t  gradient_index+n-1));\n\t  most_recent_gap_ = gap_list_.end();\n\t  most_recent_gap_--;\n\t}\n      }\n      // Finally check if gaps have merged\n      if (status == ADDED_AT_BASE\n\t  && most_recent_gap_ != gap_list_.begin()) {\n\t// Check whether the gap has merged with the next one\n\tGapListIterator it = most_recent_gap_;\n\tit--;\n\tif (it->end == most_recent_gap_->start - 1) {\n\t  // Merge two gaps\n\t  most_recent_gap_->start = it->start;\n\t  gap_list_.erase(it);\n\t}\n      }\n      else if (status == ADDED_AT_TOP) {\n\tGapListIterator it = most_recent_gap_;\n\n\tit++;\n\tif (it != gap_list_.end()\n\t    && it->start == most_recent_gap_->end + 1) {\n\t  // Merge two gaps\n\t  most_recent_gap_->end = it->end;\n\t  gap_list_.erase(it);\n\t}\n      }\n    }\n  }\n  \n  \n  // Print each derivative statement to the specified stream (standard\n  // output if omitted)\n  void\n  Stack::print_statements(std::ostream& os) const\n  {\n    for (uIndex ist = 1; ist < n_statements_; ist++) {\n      const Statement& statement = statement_[ist];\n      os << ist\n\t\t<< \": d[\" << statement.index\n\t\t<< \"] = \";\n      \n      if (statement_[ist-1].end_plus_one == statement_[ist].end_plus_one) {\n\tos << \"0\\n\";\n      }\n      else {    \n\tfor (uIndex i = statement_[ist-1].end_plus_one;\n\t     i < statement.end_plus_one; i++) {\n\t  os << \" + \" << multiplier_[i] << \"*d[\" << index_[i] << \"]\";\n\t}\n\tos << \"\\n\";\n      }\n    }\n  }\n  \n  // Print the current gradient list to the specified stream (standard\n  // output if omitted)\n  bool\n  Stack::print_gradients(std::ostream& os) const\n  {\n    if (gradients_are_initialized()) {\n      for (uIndex i = 0; i < max_gradient_; i++) {\n\tif (i%10 == 0) {\n\t  if (i != 0) {\n\t    os << \"\\n\";\n\t  }\n\t  os << i << \":\";\n\t}\n\tos << \" \" << gradient_[i];\n      }\n      os << \"\\n\";\n      return true;\n    }\n    else {\n      os << \"No gradients initialized\\n\";\n      return false;\n    }\n  }\n\n  // Print the list of gaps in the gradient list to the specified\n  // stream (standard output if omitted)\n  void\n  Stack::print_gaps(std::ostream& os) const\n  {\n    for (std::list<Gap>::const_iterator it = gap_list_.begin();\n\t it != gap_list_.end(); it++) {\n      os << it->start << \"-\" << it->end << \" \";\n    }\n  }\n\n\n#ifndef ADEPT_STACK_STORAGE_STL\n  // Initialize the vector of gradients ready for the adjoint\n  // calculation\n  void\n  Stack::initialize_gradients()\n  {\n    if (max_gradient_ > 0) {\n      if (n_allocated_gradients_ < max_gradient_) {\n\tif (gradient_) {\n\t  delete[] gradient_;\n\t}\n\tgradient_ = new Real[max_gradient_];\n\tn_allocated_gradients_ = max_gradient_;\n      }\n      for (uIndex i = 0; i < max_gradient_; i++) {\n\tgradient_[i] = 0.0;\n      }\n    }\n    gradients_initialized_ = true;\n  }\n#else\n  void\n  Stack::initialize_gradients()\n  {\n    gradient_.resize(max_gradient_+10, 0.0);\n      gradients_initialized_ = true;\n  }\n#endif\n\n  // Report information about the stack to the specified stream, or\n  // standard output if omitted; note that this is synonymous with\n  // sending the Stack object to a stream using the \"<<\" operator.\n  void\n  Stack::print_status(std::ostream& os) const\n  {\n    os << \"Automatic Differentiation Stack (address \" << this << \"):\\n\";\n    if ((!is_thread_unsafe_) && _stack_current_thread == this) {\n      os << \"   Currently attached - thread safe\\n\";\n    }\n    else if (is_thread_unsafe_ && _stack_current_thread_unsafe == this) {\n      os << \"   Currently attached - thread unsafe\\n\";\n    }\n    else {\n      os << \"   Currently detached\\n\";\n    }\n    os << \"   Recording status:\\n\";\n    if (is_recording_) {\n      os << \"      Recording is ON\\n\";  \n    }\n    else {\n      os << \"      Recording is PAUSED\\n\";\n    }\n    // Account for the null statement at the start by subtracting one\n    os << \"      \" << n_statements()-1 << \" statements (\" \n       << n_allocated_statements() << \" allocated)\";\n    os << \" and \" << n_operations() << \" operations (\" \n       << n_allocated_operations() << \" allocated)\\n\";\n    os << \"      \" << n_gradients_registered() << \" gradients currently registered \";\n    os << \"and a total of \" << max_gradients() << \" needed (current index \"\n       << i_gradient() << \")\\n\";\n    if (gap_list_.empty()) {\n      os << \"      Gradient list has no gaps\\n\";\n    }\n    else {\n      os << \"      Gradient list has \" << gap_list_.size() << \" gaps (\";\n      print_gaps(os);\n      os << \")\\n\";\n    }\n    os << \"   Computation status:\\n\";\n    if (gradients_are_initialized()) {\n      os << \"      \" << max_gradients() << \" gradients assigned (\" \n\t << n_allocated_gradients() << \" allocated)\\n\";\n    }\n    else {\n      os << \"      0 gradients assigned (\" << n_allocated_gradients()\n\t << \" allocated)\\n\";\n    }\n    os << \"      Jacobian size: \" << n_dependents() << \"x\" << n_independents() << \"\\n\";\n    if (n_dependents() <= 10 && n_independents() <= 10) {\n      os << \"      Independent indices:\";\n      for (std::size_t i = 0; i < independent_index_.size(); ++i) {\n\tos << \" \" << independent_index_[i];\n      }\n      os << \"\\n      Dependent indices:  \";\n      for (std::size_t i = 0; i < dependent_index_.size(); ++i) {\n\tos << \" \" << dependent_index_[i];\n      }\n      os << \"\\n\";\n    }\n\n#ifdef _OPENMP\n    if (have_openmp_) {\n      if (openmp_manually_disabled_) {\n\tos << \"      Parallel Jacobian calculation manually disabled\\n\";\n      }\n      else {\n\tos << \"      Parallel Jacobian calculation can use up to \"\n\t   << omp_get_max_threads() << \" threads\\n\";\n\tos << \"      Each thread treats \" << ADEPT_MULTIPASS_SIZE \n\t   << \" (in)dependent variables\\n\";\n      }\n    }\n    else {\n#endif\n      os << \"      Parallel Jacobian calculation not available\\n\";\n#ifdef _OPENMP\n    }\n#endif\n  }\n} // End namespace adept\n\n"
  },
  {
    "path": "adept/StackStorageOrig.cpp",
    "content": "/* StackStorageOrig.cpp -- Original storage of stacks using STL containers\n\n    Copyright (C) 2014-2015 University of Reading\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   The Stack class inherits from a class providing the storage (and\n   interface to the storage) for the derivative statements that are\n   accumulated during the execution of an algorithm.  The derivative\n   statements are held in two stacks described by Hogan (2014): the\n   \"statement stack\" and the \"operation stack\".\n\n   This file provides one of the original storage engine, which used\n   std::vector to hold the two stacks. Note that these stacks are\n   contiguous in memory, which is not ideal for very large algorithms.\n\n*/\n\n#include <cstring>\n\n#include <adept/StackStorageOrig.h>\n\nnamespace adept {\n  namespace internal {\n\n    StackStorageOrig::~StackStorageOrig() {\n      if (statement_) {\n\tdelete[] statement_;\n      }\n      if (multiplier_) {\n\tdelete[] multiplier_;\n      }\n      if (index_) {\n\tdelete[] index_;\n      }\n    }\n\n\n    // Double the size of the operation stack, or grow it even more if\n    // the requested minimum number of extra entries (min) is greater\n    // than this would allow\n    void\n    StackStorageOrig::grow_operation_stack(uIndex min)\n    {\n      uIndex new_size = 2*n_allocated_operations_;\n      if (min > 0 && new_size < n_allocated_operations_+min) {\n\tnew_size += min;\n      }\n      Real* new_multiplier = new Real[new_size];\n      uIndex* new_index = new uIndex[new_size];\n      \n      std::memcpy(new_multiplier, multiplier_, n_operations_*sizeof(Real));\n      std::memcpy(new_index, index_, n_operations_*sizeof(uIndex));\n      \n      delete[] multiplier_;\n      delete[] index_;\n      \n      multiplier_ = new_multiplier;\n      index_ = new_index;\n      \n      n_allocated_operations_ = new_size;\n    }\n    \n    // ... likewise for the statement stack\n    void\n    StackStorageOrig::grow_statement_stack(uIndex min)\n    {\n      uIndex new_size = 2*n_allocated_statements_;\n      if (min > 0 && new_size < n_allocated_statements_+min) {\n\tnew_size += min;\n      }\n      Statement* new_statement = new Statement[new_size];\n      std::memcpy(new_statement, statement_,\n\t\t  n_statements_*sizeof(Statement));\n      delete[] statement_;\n      \n      statement_ = new_statement;\n      \n      n_allocated_statements_ = new_size;\n    }\n\n  }\n}\n"
  },
  {
    "path": "adept/Storage.cpp",
    "content": "/* Storage.cpp -- Global variables recording use of Storage objects\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <adept/Storage.h>\n\nnamespace adept {\n  namespace internal {\n    Index n_storage_objects_created_;\n    Index n_storage_objects_deleted_;\n  }\n}\n"
  },
  {
    "path": "adept/cppblas.cpp",
    "content": "/* cppblas.cpp -- C++ interface to BLAS functions\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   This file provides a C++ interface to selected Level-2 and -3 BLAS\n   functions in which the precision of the arguments (float versus\n   double) is inferred via overloading\n\n*/\n\n#include <adept/exception.h>\n#include <adept/cppblas.h>\n\n#ifdef HAVE_CONFIG_H\n#include \"config.h\"\n#endif\n\n#ifdef HAVE_BLAS\n\nextern \"C\" {\n  void sgemm_(const char* TransA, const char* TransB, const int* M,\n\t      const int* N, const int* K, const float* alpha,\n\t      const float* A, const int* lda, const float* B, const int* ldb,\n\t      const float* beta, const float* C, const int* ldc);\n  void dgemm_(const char* TransA, const char* TransB, const int* M,\n\t      const int* N, const int* K, const double* alpha,\n\t      const double* A, const int* lda, const double* B, const int* ldb,\n\t      const double* beta, const double* C, const int* ldc);\n  void sgemv_(const char* TransA, const int* M, const int* N, const float* alpha,\n\t      const float* A, const int* lda, const float* X, const int* incX,\n\t      const float* beta, const float* Y, const int* incY);\n  void dgemv_(const char* TransA, const int* M, const int* N, const double* alpha,\n\t      const double* A, const int* lda, const double* X, const int* incX,\n\t      const double* beta, const double* Y, const int* incY);\n  void ssymm_(const char* side, const char* uplo, const int* M, const int* N,\n\t      const float* alpha, const float* A, const int* lda, const float* B,\n\t      const int* ldb, const float* beta, float* C, const int* ldc);\n  void dsymm_(const char* side, const char* uplo, const int* M, const int* N,\n\t      const double* alpha, const double* A, const int* lda, const double* B,\n\t      const int* ldb, const double* beta, double* C, const int* ldc);\n  void ssymv_(const char* uplo, const int* N, const float* alpha, const float* A, \n\t      const int* lda, const float* X, const int* incX, const float* beta, \n\t      const float* Y, const int* incY);\n  void dsymv_(const char* uplo, const int* N, const double* alpha, const double* A, \n\t      const int* lda, const double* X, const int* incX, const double* beta, \n\t      const double* Y, const int* incY);\n  void sgbmv_(const char* TransA, const int* M, const int* N, const int* kl, \n\t      const int* ku, const float* alpha, const float* A, const int* lda,\n\t      const float* X, const int* incX, const float* beta, \n\t      const float* Y, const int* incY);\n  void dgbmv_(const char* TransA, const int* M, const int* N, const int* kl, \n\t      const int* ku, const double* alpha, const double* A, const int* lda,\n\t      const double* X, const int* incX, const double* beta, \n\t      const double* Y, const int* incY);\n}\n\nnamespace adept {\n\n  namespace internal {\n    \n    // Matrix-matrix multiplication for general dense matrices\n#define ADEPT_DEFINE_GEMM(T, FUNC, FUNC_COMPLEX)\t\t\\\n    void cppblas_gemm(BLAS_ORDER Order,\t\t\t\t\\\n\t\t      BLAS_TRANSPOSE TransA,\t\t\t\\\n\t\t      BLAS_TRANSPOSE TransB,\t\t\t\\\n\t\t      int M, int N,\t\t\t\t\\\n\t\t      int K, T alpha, const T *A,\t\t\\\n\t\t      int lda, const T *B, int ldb,\t\t\\\n\t\t      T beta, T *C, int ldc) {\t\t\t\\\n      if (Order == BlasColMajor) {\t\t\t\t\\\n        FUNC(&TransA, &TransB, &M, &N, &K, &alpha, A, &lda,\t\\\n\t     B, &ldb, &beta, C, &ldc);\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n      else {\t\t\t\t\t\t\t\\\n        FUNC(&TransB, &TransA, &N, &M, &K, &alpha, B, &ldb,\t\\\n\t     A, &lda, &beta, C, &ldc);\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n    }\n    ADEPT_DEFINE_GEMM(double, dgemm_, zgemm_)\n    ADEPT_DEFINE_GEMM(float,  sgemm_, cgemm_)\n#undef ADEPT_DEFINE_GEMM\n    \n    // Matrix-vector multiplication for a general dense matrix\n#define ADEPT_DEFINE_GEMV(T, FUNC, FUNC_COMPLEX)\t\t\\\n    void cppblas_gemv(const BLAS_ORDER Order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const T alpha, const T *A, const int lda,\t\\\n\t\t      const T *X, const int incX, const T beta,\t\\\n\t\t      T *Y, const int incY) {\t\t\t\\\n      if (Order == BlasColMajor) {\t\t\t\t\\\n        FUNC(&TransA, &M, &N, &alpha, A, &lda, X, &incX, \t\\\n\t     &beta, Y, &incY);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n      else {\t\t\t\t\t\t\t\\\n        BLAS_TRANSPOSE TransNew\t\t\t\t\t\\\n\t  = TransA == BlasTrans ? BlasNoTrans : BlasTrans;\t\\\n        FUNC(&TransNew, &N, &M, &alpha, A, &lda, X, &incX, \t\\\n\t     &beta, Y, &incY);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n    }\n    ADEPT_DEFINE_GEMV(double, dgemv_, zgemv_)\n    ADEPT_DEFINE_GEMV(float,  sgemv_, cgemv_)\n#undef ADEPT_DEFINE_GEMV\n    \n    // Matrix-matrix multiplication where matrix A is symmetric\n    // FIX! CHECK ROW MAJOR VERSION IS RIGHT\t\t\t\n#define ADEPT_DEFINE_SYMM(T, FUNC, FUNC_COMPLEX)\t\t\t\\\n    void cppblas_symm(const BLAS_ORDER Order,\t\t\t\t\\\n\t\t      const BLAS_SIDE Side,\t\t\t\t\\\n\t\t      const BLAS_UPLO Uplo,\t\t\t\t\\\n\t\t      const int M, const int N,\t\t\t\t\\\n\t\t      const T alpha, const T *A, const int lda,\t\t\\\n\t\t      const T *B, const int ldb, const T beta,\t\t\\\n\t\t      T *C, const int ldc) {\t\t\t\t\\\n      if (Order == BlasColMajor) {\t\t\t\t\t\\\n        FUNC(&Side, &Uplo, &M, &N, &alpha, A, &lda,\t\t\t\\\n\t     B, &ldb, &beta, C, &ldc);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      else {\t\t\t\t\t\t\t\t\\\n\tBLAS_SIDE SideNew = Side == BlasLeft  ? BlasRight : BlasLeft;\t\\\n\tBLAS_UPLO UploNew = Uplo == BlasUpper ? BlasLower : BlasUpper;  \\\n        FUNC(&SideNew, &UploNew, &N, &M, &alpha, A, &lda,\t\t\\\n\t     B, &ldb, &beta, C, &ldc);\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    }\n    ADEPT_DEFINE_SYMM(double, dsymm_, zsymm_)\n    ADEPT_DEFINE_SYMM(float,  ssymm_, csymm_)\n#undef ADEPT_DEFINE_SYMM\n    \n    // Matrix-vector multiplication where the matrix is symmetric\n#define ADEPT_DEFINE_SYMV(T, FUNC, FUNC_COMPLEX)\t\t\t\\\n    void cppblas_symv(const BLAS_ORDER Order,\t\t\t\t\\\n\t\t      const BLAS_UPLO Uplo,\t\t\t\t\\\n\t\t      const int N, const T alpha, const T *A,\t\t\\\n\t\t      const int lda, const T *X, const int incX,\t\\\n\t\t      const T beta, T *Y, const int incY) {\t\t\\\n      if (Order == BlasColMajor) {\t\t\t\t\t\\\n        FUNC(&Uplo, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      else {\t\t\t\t\t\t\t\t\\\n        BLAS_UPLO UploNew = Uplo == BlasUpper ? BlasLower : BlasUpper;  \\\n        FUNC(&UploNew, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY);\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n    }\n    ADEPT_DEFINE_SYMV(double, dsymv_, zsymv_)\n    ADEPT_DEFINE_SYMV(float,  ssymv_, csymv_)\n#undef ADEPT_DEFINE_SYMV\n    \n    // Matrix-vector multiplication for a general band matrix\n#define ADEPT_DEFINE_GBMV(T, FUNC, FUNC_COMPLEX)\t\t\\\n    void cppblas_gbmv(const BLAS_ORDER Order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const int KL, const int KU, const T alpha,\\\n\t\t      const T *A, const int lda, const T *X,\t\\\n\t\t      const int incX, const T beta, T *Y,\t\\\n\t\t      const int incY) {\t\t\t\t\\\n      if (Order == BlasColMajor) {\t\t\t\t\\\n        FUNC(&TransA, &M, &N, &KL, &KU, &alpha, A, &lda,\t\\\n\t     X, &incX, &beta, Y, &incY);\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n      else {\t\t\t\t\t\t\t\\\n\tBLAS_TRANSPOSE TransNew\t\t\t\t\t\\\n\t  = TransA == BlasTrans ? BlasNoTrans : BlasTrans;\t\\\n\tFUNC(&TransNew, &N, &M, &KU, &KL, &alpha, A, &lda,\t\\\n\t     X, &incX, &beta, Y, &incY);\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n    }\n    ADEPT_DEFINE_GBMV(double, dgbmv_, zgbmv_)\n    ADEPT_DEFINE_GBMV(float,  sgbmv_, cgbmv_)\n#undef ADEPT_DEFINE_GBMV\n  \n  } // End namespace internal\n  \n} // End namespace adept\n  \n\n#else // Don't have BLAS\n\n\nnamespace adept {\n\n  namespace internal {\n    \n    // Matrix-matrix multiplication for general dense matrices\n#define ADEPT_DEFINE_GEMM(T, FUNC, FUNC_COMPLEX)\t\t\\\n    void cppblas_gemm(BLAS_ORDER Order,\t\t\t\t\\\n\t\t      BLAS_TRANSPOSE TransA,\t\t\t\\\n\t\t      BLAS_TRANSPOSE TransB,\t\t\t\\\n\t\t      int M, int N,\t\t\t\t\\\n\t\t      int K, T alpha, const T *A,\t\t\\\n\t\t      int lda, const T *B, int ldb,\t\t\\\n\t\t      T beta, T *C, int ldc) {\t\t\t\\\n      throw feature_not_available(\"Cannot perform matrix-matrix multiplication because compiled without BLAS\"); \\\n    }\n    ADEPT_DEFINE_GEMM(double, dgemm_, zgemm_)\n    ADEPT_DEFINE_GEMM(float,  sgemm_, cgemm_)\n#undef ADEPT_DEFINE_GEMM\n    \n    // Matrix-vector multiplication for a general dense matrix\n#define ADEPT_DEFINE_GEMV(T, FUNC, FUNC_COMPLEX)\t\t\\\n    void cppblas_gemv(const BLAS_ORDER Order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const T alpha, const T *A, const int lda,\t\\\n\t\t      const T *X, const int incX, const T beta,\t\\\n\t\t      T *Y, const int incY) {\t\t\t\\\n      throw feature_not_available(\"Cannot perform matrix-vector multiplication because compiled without BLAS\"); \\\n    }\n    ADEPT_DEFINE_GEMV(double, dgemv_, zgemv_)\n    ADEPT_DEFINE_GEMV(float,  sgemv_, cgemv_)\n#undef ADEPT_DEFINE_GEMV\n    \n    // Matrix-matrix multiplication where matrix A is symmetric\n    // FIX! CHECK ROW MAJOR VERSION IS RIGHT\t\t\t\n#define ADEPT_DEFINE_SYMM(T, FUNC, FUNC_COMPLEX)\t\t\t\\\n    void cppblas_symm(const BLAS_ORDER Order,\t\t\t\t\\\n\t\t      const BLAS_SIDE Side,\t\t\t\t\\\n\t\t      const BLAS_UPLO Uplo,\t\t\t\t\\\n\t\t      const int M, const int N,\t\t\t\t\\\n\t\t      const T alpha, const T *A, const int lda,\t\t\\\n\t\t      const T *B, const int ldb, const T beta,\t\t\\\n\t\t      T *C, const int ldc) {\t\t\t\t\\\n      throw feature_not_available(\"Cannot perform symmetric matrix-matrix multiplication because compiled without BLAS\"); \\\n    }\n    ADEPT_DEFINE_SYMM(double, dsymm_, zsymm_)\n    ADEPT_DEFINE_SYMM(float,  ssymm_, csymm_)\n#undef ADEPT_DEFINE_SYMM\n    \n    // Matrix-vector multiplication where the matrix is symmetric\n#define ADEPT_DEFINE_SYMV(T, FUNC, FUNC_COMPLEX)\t\t\t\\\n    void cppblas_symv(const BLAS_ORDER Order,\t\t\t\t\\\n\t\t      const BLAS_UPLO Uplo,\t\t\t\t\\\n\t\t      const int N, const T alpha, const T *A,\t\t\\\n\t\t      const int lda, const T *X, const int incX,\t\\\n\t\t      const T beta, T *Y, const int incY) {\t\t\\\n      throw feature_not_available(\"Cannot perform symmetric matrix-vector multiplication because compiled without BLAS\"); \\\n    }\n    ADEPT_DEFINE_SYMV(double, dsymv_, zsymv_)\n    ADEPT_DEFINE_SYMV(float,  ssymv_, csymv_)\n#undef ADEPT_DEFINE_SYMV\n    \n    // Matrix-vector multiplication for a general band matrix\n#define ADEPT_DEFINE_GBMV(T, FUNC, FUNC_COMPLEX)\t\t\\\n    void cppblas_gbmv(const BLAS_ORDER Order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const int KL, const int KU, const T alpha,\\\n\t\t      const T *A, const int lda, const T *X,\t\\\n\t\t      const int incX, const T beta, T *Y,\t\\\n\t\t      const int incY) {\t\t\t\t\\\n      throw feature_not_available(\"Cannot perform band matrix-vector multiplication because compiled without BLAS\"); \\\n    }\n    ADEPT_DEFINE_GBMV(double, dgbmv_, zgbmv_)\n    ADEPT_DEFINE_GBMV(float,  sgbmv_, cgbmv_)\n#undef ADEPT_DEFINE_GBMV\n\n  }\n}\n\n#endif\n"
  },
  {
    "path": "adept/cpplapack.h",
    "content": "/* cpplapack.h -- C++ interface to LAPACK\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptCppLapack_H\n#define AdeptCppLapack_H 1                       \n\n#include <vector>\n#include <cstddef>\n\n#ifdef HAVE_CONFIG_H\n#include \"config.h\"\n#endif\n\n#ifdef HAVE_LAPACK\n\nextern \"C\" {\n  // External LAPACK Fortran functions\n  void sgetrf_(const int* m, const int* n, float*  a, const int* lda, int* ipiv, int* info);\n  void dgetrf_(const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info);\n  void sgetri_(const int* n, float* a, const int* lda, const int* ipiv, \n\t       float* work, const int* lwork, int* info);\n  void dgetri_(const int* n, double* a, const int* lda, const int* ipiv, \n\t       double* work, const int* lwork, int* info);\n  void ssytrf_(const char* uplo, const int* n, float* a, const int* lda, int* ipiv,\n\t       float* work, const int* lwork, int* info);\n  void dsytrf_(const char* uplo, const int* n, double* a, const int* lda, int* ipiv,\n\t       double* work, const int* lwork, int* info);\n  void ssytri_(const char* uplo, const int* n, float* a, const int* lda, \n\t       const int* ipiv, float* work, int* info);\n  void dsytri_(const char* uplo, const int* n, double* a, const int* lda, \n\t       const int* ipiv, double* work, int* info);\n  void ssysv_(const char* uplo, const int* n, const int* nrhs, float* a, const int* lda, \n\t      int* ipiv, float* b, const int* ldb, float* work, const int* lwork, int* info);\n  void dsysv_(const char* uplo, const int* n, const int* nrhs, double* a, const int* lda, \n\t      int* ipiv, double* b, const int* ldb, double* work, const int* lwork, int* info);\n  void sgesv_(const int* n, const int* nrhs, float* a, const int* lda, \n\t      int* ipiv, float* b, const int* ldb, int* info);\n  void dgesv_(const int* n, const int* nrhs, double* a, const int* lda, \n\t      int* ipiv, double* b, const int* ldb, int* info);\n}\n\nnamespace adept {\n\n  // Overloaded functions provide both single &\n  // double precision versions, and prevents the huge lapacke.h having\n  // to be included in all user code\n  namespace internal {\n    typedef int lapack_int;\n    // Factorize a general matrix\n    inline\n    int cpplapack_getrf(int n, float* a,  int lda, int* ipiv) {\n      int info;\n      sgetrf_(&n, &n, a, &lda, ipiv, &info);\n      return info;\n    }\n    inline\n    int cpplapack_getrf(int n, double* a, int lda, int* ipiv) {\n      int info;\n      dgetrf_(&n, &n, a, &lda, ipiv, &info);\n      return info;\n    }\n\n    // Invert a general matrix\n    inline\n    int cpplapack_getri(int n, float* a,  int lda, const int* ipiv) {\n      int info;\n      float work_query;\n      int lwork = -1;\n      // Find out how much work memory required\n      sgetri_(&n, a, &lda, ipiv, &work_query, &lwork, &info);\n      lwork = static_cast<int>(work_query);\n      std::vector<float> work(static_cast<std::size_t>(lwork));\n      // Do full calculation\n      sgetri_(&n, a, &lda, ipiv, &work[0], &lwork, &info);\n      return info;\n    }\n    inline\n    int cpplapack_getri(int n, double* a,  int lda, const int* ipiv) {\n      int info;\n      double work_query;\n      int lwork = -1;\n      // Find out how much work memory required\n      dgetri_(&n, a, &lda, ipiv, &work_query, &lwork, &info);\n      lwork = static_cast<int>(work_query);\n      std::vector<double> work(static_cast<std::size_t>(lwork));\n      // Do full calculation\n      dgetri_(&n, a, &lda, ipiv, &work[0], &lwork, &info);\n      return info;\n    }\n\n    // Factorize a symmetric matrix\n    inline\n    int cpplapack_sytrf(char uplo, int n, float* a, int lda, int* ipiv) {\n      int info;\n      float work_query;\n      int lwork = -1;\n      // Find out how much work memory required\n      ssytrf_(&uplo, &n, a, &lda, ipiv, &work_query, &lwork, &info);\n      lwork = static_cast<int>(work_query);\n      std::vector<float> work(static_cast<std::size_t>(lwork));\n      // Do full calculation\n      ssytrf_(&uplo, &n, a, &lda, ipiv, &work[0], &lwork, &info);\n      return info;\n    }\n    inline\n    int cpplapack_sytrf(char uplo, int n, double* a, int lda, int* ipiv) {\n      int info;\n      double work_query;\n      int lwork = -1;\n      // Find out how much work memory required\n      dsytrf_(&uplo, &n, a, &lda, ipiv, &work_query, &lwork, &info);\n      lwork = static_cast<int>(work_query);\n      std::vector<double> work(static_cast<std::size_t>(lwork));\n      // Do full calculation\n      dsytrf_(&uplo, &n, a, &lda, ipiv, &work[0], &lwork, &info);\n      return info;\n    }\n\n    // Invert a symmetric matrix\n    inline\n    int cpplapack_sytri(char uplo, int n, float* a, int lda, const int* ipiv) {\n      int info;\n      std::vector<float> work(n);\n      ssytri_(&uplo, &n, a, &lda, ipiv, &work[0], &info);\n      return info;\n    }\n    inline\n    int cpplapack_sytri(char uplo, int n, double* a, int lda, const int* ipiv) {\n      int info;\n      std::vector<double> work(n);\n      dsytri_(&uplo, &n, a, &lda, ipiv, &work[0], &info);\n      return info;\n    }\n\n    // Solve system of linear equations with general matrix\n    inline\n    int cpplapack_gesv(int n, int nrhs, float* a, int lda,\n\t\t       int* ipiv, float* b, int ldb) {\n      int info;\n      sgesv_(&n, &nrhs, a, &lda, ipiv, b, &lda, &info);\n      return info;\n    }\n    inline\n    int cpplapack_gesv(int n, int nrhs, double* a, int lda,\n\t\t       int* ipiv, double* b, int ldb) {\n      int info;\n      dgesv_(&n, &nrhs, a, &lda, ipiv, b, &lda, &info);\n      return info;\n    }\n\n    // Solve system of linear equations with symmetric matrix\n    inline\n    int cpplapack_sysv(char uplo, int n, int nrhs, float* a, int lda, int* ipiv,\n\t\t       float* b, int ldb) {\n      int info;\n      float work_query;\n      int lwork = -1;\n      // Find out how much work memory required\n      ssysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work_query, &lwork, &info);\n      lwork = static_cast<int>(work_query);\n      std::vector<float> work(static_cast<std::size_t>(lwork));\n      // Do full calculation\n      ssysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work[0], &lwork, &info);\n      return info;\n    }\n    inline\n    int cpplapack_sysv(char uplo, int n, int nrhs, double* a, int lda, int* ipiv,\n\t\t       double* b, int ldb) {\n      int info;\n      double work_query;\n      int lwork = -1;\n      // Find out how much work memory required\n      dsysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work_query, &lwork, &info);\n      lwork = static_cast<int>(work_query);\n      std::vector<double> work(static_cast<std::size_t>(lwork));\n      // Do full calculation\n      dsysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work[0], &lwork, &info);\n      return info;\n    }\n\n  }\n}\n\n#endif\n\n#endif\n"
  },
  {
    "path": "adept/index.cpp",
    "content": "/* index.cpp -- Definitions of \"end\" and \"__\" for array indexing\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#include <adept/RangeIndex.h>\n\nnamespace adept {\n\n  ::adept::internal::EndIndex end;\n  ::adept::internal::AllIndex __;\n\n}\n"
  },
  {
    "path": "adept/inv.cpp",
    "content": "/* inv.cpp -- Invert matrices\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                             \n#include <vector>\n\n#include <adept/Array.h>\n#include <adept/SpecialMatrix.h>\n\n#ifndef AdeptSource_H\n#include \"cpplapack.h\"\n#endif\n\n#ifdef HAVE_LAPACK\n\nnamespace adept {\n\n  using namespace internal;\n  \n  // -------------------------------------------------------------------\n  // Invert general square matrix A\n  // -------------------------------------------------------------------\n  template <typename Type>\n  Array<2,Type,false> \n  inv(const Array<2,Type,false>& A) {\n    using internal::cpplapack_getrf;\n    using internal::cpplapack_getri;\n\n    if (A.dimension(0) != A.dimension(1)) {\n      throw invalid_operation(\"Only square matrices can be inverted\"\n\t\t\t      ADEPT_EXCEPTION_LOCATION);\n    }\n\n    Array<2,Type,false> A_;\n\n    // LAPACKE is more efficient with column-major input\n    A_.resize_column_major(A.dimensions());\n    A_ = A;\n\n    std::vector<lapack_int> ipiv(A_.dimension(0));\n\n    //    lapack_int status = LAPACKE_dgetrf(LAPACK_COL_MAJOR, A_.dimension(0), A_.dimension(1),\n    //\t\t\t\t       A_.data(), A_.offset(1), &ipiv[0]);\n\n    lapack_int status = cpplapack_getrf(A_.dimension(0),\n\t\t\t\t\tA_.data(), A_.offset(1), &ipiv[0]);\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to factorize matrix: LAPACK ?getrf returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n\n    //    status = LAPACKE_dgetri(LAPACK_COL_MAJOR, A_.dimension(0),\n    //\t\t\t    A_.data(), A_.offset(1), &ipiv[0]);\n    status = cpplapack_getri(A_.dimension(0),\n\t\t\t     A_.data(), A_.offset(1), &ipiv[0]);\n\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to invert matrix: LAPACK ?getri returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n    return A_;\n  }\n\n\n\n  // -------------------------------------------------------------------\n  // Invert symmetric matrix A\n  // -------------------------------------------------------------------\n  template <typename Type, SymmMatrixOrientation Orient>\n  SpecialMatrix<Type,SymmEngine<Orient>,false> \n  inv(const SpecialMatrix<Type,SymmEngine<Orient>,false>& A) {\n    using internal::cpplapack_sytrf;\n    using internal::cpplapack_sytri;\n\n    SpecialMatrix<Type,SymmEngine<Orient>,false> A_;\n\n    A_.resize(A.dimension());\n    A_ = A;\n\n    // Treat symmetric matrix as column-major\n    char uplo;\n    if (Orient == ROW_LOWER_COL_UPPER) {\n      uplo = 'U';\n    }\n    else {\n      uplo = 'L';\n    }\n\n    std::vector<lapack_int> ipiv(A_.dimension(0));\n\n    //    lapack_int status = LAPACKE_dsytrf(LAPACK_COL_MAJOR, uplo, A_.dimension(),\n    //\t\t\t\t       A_.data(), A_.offset(), &ipiv[0]);\n    lapack_int status = cpplapack_sytrf(uplo, A_.dimension(),\n\t\t\t\t\tA_.data(), A_.offset(), &ipiv[0]);\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to factorize symmetric matrix: LAPACK ?sytrf returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n\n    //    status = LAPACKE_dsytri(LAPACK_COL_MAJOR, uplo, A_.dimension(),\n    //\t\t\t    A_.data(), A_.offset(), &ipiv[0]);\n    status = cpplapack_sytri(uplo, A_.dimension(),\n\t\t\t     A_.data(), A_.offset(), &ipiv[0]);\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to invert symmetric matrix: LAPACK ?sytri returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n    return A_;\n  }\n\n}\n\n#else // LAPACK not available\n    \nnamespace adept {\n\n  using namespace internal;\n\n  // -------------------------------------------------------------------\n  // Invert general square matrix A\n  // -------------------------------------------------------------------\n  template <typename Type>\n  Array<2,Type,false> \n  inv(const Array<2,Type,false>& A) {\n    throw feature_not_available(\"Cannot invert matrix because compiled without LAPACK\");\n  }\n\n  // -------------------------------------------------------------------\n  // Invert symmetric matrix A\n  // -------------------------------------------------------------------\n  template <typename Type, SymmMatrixOrientation Orient>\n  SpecialMatrix<Type,SymmEngine<Orient>,false> \n  inv(const SpecialMatrix<Type,SymmEngine<Orient>,false>& A) {\n    throw feature_not_available(\"Cannot invert matrix because compiled without LAPACK\");\n  }\n  \n}\n\n#endif\n\nnamespace adept {\n  // -------------------------------------------------------------------\n  // Explicit instantiations\n  // -------------------------------------------------------------------\n#define ADEPT_EXPLICIT_INV(TYPE)\t\t\t\t\t\\\n  template Array<2,TYPE,false>\t\t\t\t\t\t\\\n  inv(const Array<2,TYPE,false>& A);\t\t\t\t\t\\\n  template SpecialMatrix<TYPE,SymmEngine<ROW_LOWER_COL_UPPER>,false>\t\\\n  inv(const SpecialMatrix<TYPE,SymmEngine<ROW_LOWER_COL_UPPER>,false>&); \\\n  template SpecialMatrix<TYPE,SymmEngine<ROW_UPPER_COL_LOWER>,false>\t\\\n  inv(const SpecialMatrix<TYPE,SymmEngine<ROW_UPPER_COL_LOWER>,false>&)\n\n  ADEPT_EXPLICIT_INV(float);\n  ADEPT_EXPLICIT_INV(double);\n\n#undef ADEPT_EXPLICIT_INV\n  \n}\n\n\n"
  },
  {
    "path": "adept/jacobian.cpp",
    "content": "/* jacobian.cpp -- Computation of Jacobian matrix\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015-2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifdef _OPENMP\n#include <omp.h>\n#endif\n\n#include <adept_arrays.h>\n\nnamespace adept {\n\n  namespace internal {\n    static const int MULTIPASS_SIZE = ADEPT_REAL_PACKET_SIZE == 1 ? ADEPT_MULTIPASS_SIZE : ADEPT_REAL_PACKET_SIZE;\n  }\n\n  using namespace internal;\n\n  template <typename T>\n  T _check_long_double() {\n    // The user may have requested Real to be of type \"long double\" by\n    // specifying ADEPT_REAL_TYPE_SIZE=16. If the present system can\n    // only support double then sizeof(long double) will be 8, but\n    // Adept will not be emitting the best code for this, so it is\n    // probably better to fail forcing the user to specify\n    // ADEPT_REAL_TYPE_SIZE=8.\n    ADEPT_STATIC_ASSERT(ADEPT_REAL_TYPE_SIZE != 16 || ADEPT_REAL_TYPE_SIZE == sizeof(Real),\n\t\t\tCOMPILER_DOES_NOT_SUPPORT_16_BYTE_LONG_DOUBLE);\n    return 1;\n  }\n\n#if ADEPT_REAL_PACKET_SIZE > 1\n  void\n  Stack::jacobian_forward_kernel(Real* __restrict gradient_multipass_b) const\n  {\n\n    // Loop forward through the derivative statements\n    for (uIndex ist = 1; ist < n_statements_; ist++) {\n      const Statement& statement = statement_[ist];\n      // We copy the LHS to \"a\" in case it appears on the RHS in any\n      // of the following statements\n      Packet<Real> a; // Zeroed automatically\n      // Loop through operations\n      for (uIndex iop = statement_[ist-1].end_plus_one;\n\t   iop < statement.end_plus_one; iop++) {\n\tPacket<Real> g(gradient_multipass_b+index_[iop]*MULTIPASS_SIZE);\n\tPacket<Real> m(multiplier_[iop]);\n\ta += m * g;\n      }\n      // Copy the results\n      a.put(gradient_multipass_b+statement.index*MULTIPASS_SIZE);\n    } // End of loop over statements\n  }    \n#else\n  void\n  Stack::jacobian_forward_kernel(Real* __restrict gradient_multipass_b) const\n  {\n\n    // Loop forward through the derivative statements\n    for (uIndex ist = 1; ist < n_statements_; ist++) {\n      const Statement& statement = statement_[ist];\n      // We copy the LHS to \"a\" in case it appears on the RHS in any\n      // of the following statements\n      Block<MULTIPASS_SIZE,Real> a; // Zeroed automatically\n      // Loop through operations\n      for (uIndex iop = statement_[ist-1].end_plus_one;\n\t   iop < statement.end_plus_one; iop++) {\n\tfor (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t  a[i] += multiplier_[iop]*gradient_multipass_b[index_[iop]*MULTIPASS_SIZE+i];\n\t}\n      }\n      // Copy the results\n      for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\tgradient_multipass_b[statement.index*MULTIPASS_SIZE+i] = a[i];\n      }\n    } // End of loop over statements\n  }    \n#endif\n\n  void\n  Stack::jacobian_forward_kernel_extra(Real* __restrict gradient_multipass_b,\n\t\t\t\t       uIndex n_extra) const\n  {\n\n    // Loop forward through the derivative statements\n    for (uIndex ist = 1; ist < n_statements_; ist++) {\n      const Statement& statement = statement_[ist];\n      // We copy the LHS to \"a\" in case it appears on the RHS in any\n      // of the following statements\n      Block<MULTIPASS_SIZE,Real> a; // Zeroed automatically\n      // Loop through operations\n      for (uIndex iop = statement_[ist-1].end_plus_one;\n\t   iop < statement.end_plus_one; iop++) {\n\tfor (uIndex i = 0; i < n_extra; i++) {\n\t  a[i] += multiplier_[iop]*gradient_multipass_b[index_[iop]*MULTIPASS_SIZE+i];\n\t}\n      }\n      // Copy the results\n      for (uIndex i = 0; i < n_extra; i++) {\n\tgradient_multipass_b[statement.index*MULTIPASS_SIZE+i] = a[i];\n      }\n    } // End of loop over statements\n  }    \n\n\n\n  // Compute the Jacobian matrix, parallelized using OpenMP. Normally\n  // the user would call the jacobian or jacobian_forward functions,\n  // and the OpenMP version would only be called if OpenMP is\n  // available and the Jacobian matrix is large enough for\n  // parallelization to be worthwhile.  Note that jacobian_out must be\n  // allocated to be at least of size m*n, where m is the number of\n  // dependent variables and n is the number of independents. The\n  // independents and dependents must have already been identified\n  // with the functions \"independent\" and \"dependent\", otherwise this\n  // function will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. The\n  // offsets in memory of the two dimensions are provided by\n  // dep_offset and indep_offset. This is implemented using a forward\n  // pass, appropriate for m>=n.\n  void\n  Stack::jacobian_forward_openmp(Real* jacobian_out,\n\t\t\t\t Index dep_offset, Index indep_offset) const\n  {\n\n    // Number of blocks to cycle through, including a possible last\n    // block containing fewer than MULTIPASS_SIZE variables\n    int n_block = (n_independent() + MULTIPASS_SIZE - 1)\n      / MULTIPASS_SIZE;\n    uIndex n_extra = n_independent() % MULTIPASS_SIZE;\n    \n#pragma omp parallel\n    {\n      //      std::vector<Block<MULTIPASS_SIZE,Real> > \n      //\tgradient_multipass_b(max_gradient_);\n      uIndex gradient_multipass_size = max_gradient_*MULTIPASS_SIZE;\n      Real* __restrict gradient_multipass_b \n\t= alloc_aligned<Real>(gradient_multipass_size);\n      \n#pragma omp for schedule(static)\n      for (int iblock = 0; iblock < n_block; iblock++) {\n\t// Set the index to the dependent variables for this block\n\tuIndex i_independent =  MULTIPASS_SIZE * iblock;\n\t\n\tuIndex block_size = MULTIPASS_SIZE;\n\t// If this is the last iteration and the number of extra\n\t// elements is non-zero, then set the block size to the number\n\t// of extra elements. If the number of extra elements is zero,\n\t// then the number of independent variables is exactly divisible\n\t// by MULTIPASS_SIZE, so the last iteration will be the\n\t// same as all the rest.\n\tif (iblock == n_block-1 && n_extra > 0) {\n\t  block_size = n_extra;\n\t}\n\t\n\t// Set the initial gradients all to zero\n\tfor (uIndex i = 0; i < gradient_multipass_size; i++) {\n\t  gradient_multipass_b[i] = 0.0;\n\t}\n\t// Each seed vector has one non-zero entry of 1.0\n\tfor (uIndex i = 0; i < block_size; i++) {\n\t  gradient_multipass_b[independent_index_[i_independent+i]*MULTIPASS_SIZE+i] = 1.0;\n\t}\n\n\tjacobian_forward_kernel(gradient_multipass_b);\n\n\t// Copy the gradients corresponding to the dependent variables\n\t// into the Jacobian matrix\n\tif (indep_offset == 1) {\n\t  for (uIndex idep = 0; idep < n_dependent(); idep++) {\n\t    for (uIndex i = 0; i < block_size; i++) {\n\t      jacobian_out[idep*dep_offset+i_independent+i]\n\t\t= gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i];\n\t    }\n\t  }\n\t}\n\telse {\n\t  for (uIndex idep = 0; idep < n_dependent(); idep++) {\n\t    for (uIndex i = 0; i < block_size; i++) {\n\t      jacobian_out[(i_independent+i)*indep_offset+idep*dep_offset]\n\t\t= gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i];\n\t    }\n\t  }\n\t}\n      } // End of loop over blocks\n      free_aligned(gradient_multipass_b);\n    } // End of parallel section\n  } // End of jacobian function\n\n\n  // Compute the Jacobian matrix; note that jacobian_out must be\n  // allocated to be of size m*n, where m is the number of dependent\n  // variables and n is the number of independents. The independents\n  // and dependents must have already been identified with the\n  // functions \"independent\" and \"dependent\", otherwise this function\n  // will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. This is\n  // implemented using a forward pass, appropriate for m>=n.\n  void\n  Stack::jacobian_forward(Real* jacobian_out,\n\t\t\t  Index dep_offset, Index indep_offset) const\n  {\n    if (independent_index_.empty() || dependent_index_.empty()) {\n      throw(dependents_or_independents_not_identified());\n    }\n\n    // If either of the offsets are zero, set them to the size of the\n    // other dimension, which assumes that the full Jacobian matrix is\n    // contiguous in memory.\n    if (dep_offset <= 0) {\n      dep_offset = n_independent();\n    }\n    if (indep_offset <= 0) {\n      indep_offset = n_dependent();\n    }\n\n#ifdef _OPENMP\n    if (have_openmp_ \n\t&& !openmp_manually_disabled_\n\t&& n_independent() > MULTIPASS_SIZE\n\t&& omp_get_max_threads() > 1) {\n      // Call the parallel version\n      jacobian_forward_openmp(jacobian_out, dep_offset, indep_offset);\n      return;\n    }\n#endif\n\n    // For optimization reasons, we process a block of\n    // MULTIPASS_SIZE columns of the Jacobian at once; calculate\n    // how many blocks are needed and how many extras will remain\n    uIndex n_block = n_independent() / MULTIPASS_SIZE;\n    uIndex n_extra = n_independent() % MULTIPASS_SIZE;\n\n    ///gradient_multipass_.resize(max_gradient_);\n    uIndex gradient_multipass_size = max_gradient_*MULTIPASS_SIZE;\n    Real* __restrict gradient_multipass_b \n      = alloc_aligned<Real>(gradient_multipass_size);\n\n    // Loop over blocks of MULTIPASS_SIZE columns\n    for (uIndex iblock = 0; iblock < n_block; iblock++) {\n      // Set the index to the dependent variables for this block\n      uIndex i_independent =  MULTIPASS_SIZE * iblock;\n\n      // Set the initial gradients all to zero\n      ///zero_gradient_multipass();\n      for (uIndex i = 0; i < gradient_multipass_size; i++) {\n\tgradient_multipass_b[i] = 0.0;\n      }\n\n      // Each seed vector has one non-zero entry of 1.0\n      for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\tgradient_multipass_b[independent_index_[i_independent+i]*MULTIPASS_SIZE+i] = 1.0;\n      }\n\n      jacobian_forward_kernel(gradient_multipass_b);\n\n      // Copy the gradients corresponding to the dependent variables\n      // into the Jacobian matrix\n      if (indep_offset == 1) {\n\tfor (uIndex idep = 0; idep < n_dependent(); idep++) {\n\t  for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t    jacobian_out[idep*dep_offset+i_independent+i]\n\t      = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i];\n\t  }\n\t}\n      }\n      else {\n\tfor (uIndex idep = 0; idep < n_dependent(); idep++) {\n\t  for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t    jacobian_out[(i_independent+i)*indep_offset+idep*dep_offset] \n\t      = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i];\n\t  }\n\t}\n      }\n    } // End of loop over blocks\n    \n    // Now do the same but for the remaining few columns in the matrix\n    if (n_extra > 0) {\n      uIndex i_independent =  MULTIPASS_SIZE * n_block;\n      ///zero_gradient_multipass();\n      for (uIndex i = 0; i < gradient_multipass_size; i++) {\n\tgradient_multipass_b[i] = 0.0;\n      }\n\n      for (uIndex i = 0; i < n_extra; i++) {\n\tgradient_multipass_b[independent_index_[i_independent+i]*MULTIPASS_SIZE+i] = 1.0;\n      }\n\n      jacobian_forward_kernel_extra(gradient_multipass_b, n_extra);\n\n      if (indep_offset == 1) {\n\tfor (uIndex idep = 0; idep < n_dependent(); idep++) {\n\t  for (uIndex i = 0; i < n_extra; i++) {\n\t    jacobian_out[idep*dep_offset+i_independent+i]\n\t      = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i];\n\t  }\n\t}\n      }\n      else {\n\tfor (uIndex idep = 0; idep < n_dependent(); idep++) {\n\t  for (uIndex i = 0; i < n_extra; i++) {\n\t    jacobian_out[(i_independent+i)*indep_offset+idep*dep_offset] \n\t      = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i];\n\t  }\n\t}\n      }\n    }\n\n    free_aligned(gradient_multipass_b);\n  }\n\n\n  // Compute the Jacobian matrix, parallelized using OpenMP.  Normally\n  // the user would call the jacobian or jacobian_reverse functions,\n  // and the OpenMP version would only be called if OpenMP is\n  // available and the Jacobian matrix is large enough for\n  // parallelization to be worthwhile.  Note that jacobian_out must be\n  // allocated to be at least of size m*n, where m is the number of\n  // dependent variables and n is the number of independents. The\n  // independents and dependents must have already been identified\n  // with the functions \"independent\" and \"dependent\", otherwise this\n  // function will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. The\n  // offsets in memory of the two dimensions are provided by\n  // dep_offset and indep_offset.  This is implemented using a reverse\n  // pass, appropriate for m<n.\n  void\n  Stack::jacobian_reverse_openmp(Real* jacobian_out,\n\t\t\t\t Index dep_offset, Index indep_offset) const\n  {\n\n    // Number of blocks to cycle through, including a possible last\n    // block containing fewer than MULTIPASS_SIZE variables\n    int n_block = (n_dependent() + MULTIPASS_SIZE - 1)\n      / MULTIPASS_SIZE;\n    uIndex n_extra = n_dependent() % MULTIPASS_SIZE;\n    \n    // Inside the OpenMP loop, the \"this\" pointer may be NULL if the\n    // adept::Stack pointer is declared as thread-local and if the\n    // OpenMP memory model uses thread-local storage for private\n    // data. If this is the case then local pointers to or copies of\n    // the following members of the adept::Stack object may need to be\n    // made: dependent_index_ n_statements_ statement_ multiplier_\n    // index_ independent_index_ n_dependent() n_independent().\n    // Limited testing implies this is OK though.\n\n#pragma omp parallel\n    {\n      std::vector<Block<MULTIPASS_SIZE,Real> > \n\tgradient_multipass_b(max_gradient_);\n      \n#pragma omp for schedule(static)\n      for (int iblock = 0; iblock < n_block; iblock++) {\n\t// Set the index to the dependent variables for this block\n\tuIndex i_dependent =  MULTIPASS_SIZE * iblock;\n\t\n\tuIndex block_size = MULTIPASS_SIZE;\n\t// If this is the last iteration and the number of extra\n\t// elements is non-zero, then set the block size to the number\n\t// of extra elements. If the number of extra elements is zero,\n\t// then the number of independent variables is exactly divisible\n\t// by MULTIPASS_SIZE, so the last iteration will be the\n\t// same as all the rest.\n\tif (iblock == n_block-1 && n_extra > 0) {\n\t  block_size = n_extra;\n\t}\n\n\t// Set the initial gradients all to zero\n\tfor (std::size_t i = 0; i < gradient_multipass_b.size(); i++) {\n\t  gradient_multipass_b[i].zero();\n\t}\n\t// Each seed vector has one non-zero entry of 1.0\n\tfor (uIndex i = 0; i < block_size; i++) {\n\t  gradient_multipass_b[dependent_index_[i_dependent+i]][i] = 1.0;\n\t}\n\n\t// Loop backward through the derivative statements\n\tfor (uIndex ist = n_statements_-1; ist > 0; ist--) {\n\t  const Statement& statement = statement_[ist];\n\t  // We copy the RHS to \"a\" in case it appears on the LHS in any\n\t  // of the following statements\n\t  Real a[MULTIPASS_SIZE];\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t  // For large blocks, we only process the ones where a[i] is\n\t  // non-zero\n\t  uIndex i_non_zero[MULTIPASS_SIZE];\n#endif\n\t  uIndex n_non_zero = 0;\n\t  for (uIndex i = 0; i < block_size; i++) {\n\t    a[i] = gradient_multipass_b[statement.index][i];\n\t    gradient_multipass_b[statement.index][i] = 0.0;\n\t    if (a[i] != 0.0) {\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t      i_non_zero[n_non_zero++] = i;\n#else\n\t      n_non_zero = 1;\n#endif\n\t    }\n\t  }\n\n\t  // Only do anything for this statement if any of the a values\n\t  // are non-zero\n\t  if (n_non_zero) {\n\t    // Loop through the operations\n\t    for (uIndex iop = statement_[ist-1].end_plus_one;\n\t\t iop < statement.end_plus_one; iop++) {\n\t      // Try to minimize pointer dereferencing by making local\n\t      // copies\n\t      Real multiplier = multiplier_[iop];\n\t      Real* __restrict gradient_multipass \n\t\t= &(gradient_multipass_b[index_[iop]][0]);\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t      // For large blocks, loop over only the indices\n\t      // corresponding to non-zero a\n\t      for (uIndex i = 0; i < n_non_zero; i++) {\n\t\tgradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]];\n\t      }\n#else\n\t      // For small blocks, do all indices\n\t      for (uIndex i = 0; i < block_size; i++) {\n\t      //\t      for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t\tgradient_multipass[i] += multiplier*a[i];\n\t      }\n#endif\n\t    }\n\t  }\n\t} // End of loop over statement\n\t// Copy the gradients corresponding to the independent\n\t// variables into the Jacobian matrix\n\tif (dep_offset == 1) {\n\t  for (uIndex iindep = 0; iindep < n_independent(); iindep++) {\n\t    for (uIndex i = 0; i < block_size; i++) {\n\t      jacobian_out[iindep*indep_offset+i_dependent+i] \n\t\t= gradient_multipass_b[independent_index_[iindep]][i];\n\t    }\n\t  }\n\t}\n\telse {\n\t  for (uIndex iindep = 0; iindep < n_independent(); iindep++) {\n\t    for (uIndex i = 0; i < block_size; i++) {\n\t      jacobian_out[iindep*indep_offset+(i_dependent+i)*dep_offset] \n\t\t= gradient_multipass_b[independent_index_[iindep]][i];\n\t    }\n\t  }\n\t}\n      } // End of loop over blocks\n    } // end #pragma omp parallel\n  } // end jacobian_reverse_openmp\n\n\n  // Compute the Jacobian matrix; note that jacobian_out must be\n  // allocated to be of size m*n, where m is the number of dependent\n  // variables and n is the number of independents. The independents\n  // and dependents must have already been identified with the\n  // functions \"independent\" and \"dependent\", otherwise this function\n  // will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED.  This is\n  // implemented using a reverse pass, appropriate for m<n.\n  void\n  Stack::jacobian_reverse(Real* jacobian_out,\n\t\t\t  Index dep_offset, Index indep_offset) const\n  {\n    if (independent_index_.empty() || dependent_index_.empty()) {\n      throw(dependents_or_independents_not_identified());\n    }\n\n    // If either of the offsets are zero, set them to the size of the\n    // other dimension, which assumes that the full Jacobian matrix is\n    // contiguous in memory.\n    if (dep_offset <= 0) {\n      dep_offset = n_independent();\n    }\n    if (indep_offset <= 0) {\n      indep_offset = n_dependent();\n    }\n\n#ifdef _OPENMP\n    if (have_openmp_ \n\t&& !openmp_manually_disabled_\n\t&& n_dependent() > MULTIPASS_SIZE\n\t&& omp_get_max_threads() > 1) {\n      // Call the parallel version\n      jacobian_reverse_openmp(jacobian_out,\n\t\t\t      dep_offset, indep_offset);\n      return;\n    }\n#endif\n\n    //    gradient_multipass_.resize(max_gradient_);\n    std::vector<Block<MULTIPASS_SIZE,Real> > \n      gradient_multipass_b(max_gradient_);\n\n    // For optimization reasons, we process a block of\n    // MULTIPASS_SIZE rows of the Jacobian at once; calculate\n    // how many blocks are needed and how many extras will remain\n    uIndex n_block = n_dependent() / MULTIPASS_SIZE;\n    uIndex n_extra = n_dependent() % MULTIPASS_SIZE;\n    uIndex i_dependent = 0; // uIndex of first row in the block we are\n\t\t\t    // currently computing\n    // Loop over the of MULTIPASS_SIZE rows\n    for (uIndex iblock = 0; iblock < n_block; iblock++) {\n      // Set the initial gradients all to zero\n      //      zero_gradient_multipass();\n      for (std::size_t i = 0; i < gradient_multipass_b.size(); i++) {\n\tgradient_multipass_b[i].zero();\n      }\n\n      // Each seed vector has one non-zero entry of 1.0\n      for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\tgradient_multipass_b[dependent_index_[i_dependent+i]][i] = 1.0;\n      }\n      // Loop backward through the derivative statements\n      for (uIndex ist = n_statements_-1; ist > 0; ist--) {\n\tconst Statement& statement = statement_[ist];\n\t// We copy the RHS to \"a\" in case it appears on the LHS in any\n\t// of the following statements\n\tReal a[MULTIPASS_SIZE];\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t// For large blocks, we only process the ones where a[i] is\n\t// non-zero\n\tuIndex i_non_zero[MULTIPASS_SIZE];\n#endif\n\tuIndex n_non_zero = 0;\n\tfor (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t  a[i] = gradient_multipass_b[statement.index][i];\n\t  gradient_multipass_b[statement.index][i] = 0.0;\n\t  if (a[i] != 0.0) {\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t    i_non_zero[n_non_zero++] = i;\n#else\n\t    n_non_zero = 1;\n#endif\n\t  }\n\t}\n\t// Only do anything for this statement if any of the a values\n\t// are non-zero\n\tif (n_non_zero) {\n\t  // Loop through the operations\n\t  for (uIndex iop = statement_[ist-1].end_plus_one;\n\t       iop < statement.end_plus_one; iop++) {\n\t    // Try to minimize pointer dereferencing by making local\n\t    // copies\n\t    Real multiplier = multiplier_[iop];\n\t    Real* __restrict gradient_multipass \n\t      = &(gradient_multipass_b[index_[iop]][0]);\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t    // For large blocks, loop over only the indices\n\t    // corresponding to non-zero a\n\t    for (uIndex i = 0; i < n_non_zero; i++) {\n\t      gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]];\n\t    }\n#else\n\t    // For small blocks, do all indices\n\t    for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t      gradient_multipass[i] += multiplier*a[i];\n\t    }\n#endif\n\t  }\n\t}\n      } // End of loop over statement\n      // Copy the gradients corresponding to the independent variables\n      // into the Jacobian matrix\n      if (dep_offset == 1) {\n\tfor (uIndex iindep = 0; iindep < n_independent(); iindep++) {\n\t  for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t    jacobian_out[iindep*indep_offset+i_dependent+i] \n\t      = gradient_multipass_b[independent_index_[iindep]][i];\n\t  }\n\t}\n      }\n      else {\n\tfor (uIndex iindep = 0; iindep < n_independent(); iindep++) {\n\t  for (uIndex i = 0; i < MULTIPASS_SIZE; i++) {\n\t    jacobian_out[iindep*indep_offset+(i_dependent+i)*dep_offset] \n\t      = gradient_multipass_b[independent_index_[iindep]][i];\n\t  }\n\t}\n      }\n      i_dependent += MULTIPASS_SIZE;\n    } // End of loop over blocks\n    \n    // Now do the same but for the remaining few rows in the matrix\n    if (n_extra > 0) {\n      for (std::size_t i = 0; i < gradient_multipass_b.size(); i++) {\n\tgradient_multipass_b[i].zero();\n      }\n      //      zero_gradient_multipass();\n      for (uIndex i = 0; i < n_extra; i++) {\n\tgradient_multipass_b[dependent_index_[i_dependent+i]][i] = 1.0;\n      }\n      for (uIndex ist = n_statements_-1; ist > 0; ist--) {\n\tconst Statement& statement = statement_[ist];\n\tReal a[MULTIPASS_SIZE];\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\tuIndex i_non_zero[MULTIPASS_SIZE];\n#endif\n\tuIndex n_non_zero = 0;\n\tfor (uIndex i = 0; i < n_extra; i++) {\n\t  a[i] = gradient_multipass_b[statement.index][i];\n\t  gradient_multipass_b[statement.index][i] = 0.0;\n\t  if (a[i] != 0.0) {\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t    i_non_zero[n_non_zero++] = i;\n#else\n\t    n_non_zero = 1;\n#endif\n\t  }\n\t}\n\tif (n_non_zero) {\n\t  for (uIndex iop = statement_[ist-1].end_plus_one;\n\t       iop < statement.end_plus_one; iop++) {\n\t    Real multiplier = multiplier_[iop];\n\t    Real* __restrict gradient_multipass \n\t      = &(gradient_multipass_b[index_[iop]][0]);\n#if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK\n\t    for (uIndex i = 0; i < n_non_zero; i++) {\n\t      gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]];\n\t    }\n#else\n\t    for (uIndex i = 0; i < n_extra; i++) {\n\t      gradient_multipass[i] += multiplier*a[i];\n\t    }\n#endif\n\t  }\n\t}\n      }\n      if (dep_offset == 1) {\n\tfor (uIndex iindep = 0; iindep < n_independent(); iindep++) {\n\t  for (uIndex i = 0; i < n_extra; i++) {\n\t    jacobian_out[iindep*indep_offset+i_dependent+i] \n\t      = gradient_multipass_b[independent_index_[iindep]][i];\n\t  }\n\t}\n      }\n      else {\n\tfor (uIndex iindep = 0; iindep < n_independent(); iindep++) {\n\t  for (uIndex i = 0; i < n_extra; i++) {\n\t    jacobian_out[iindep*indep_offset+(i_dependent+i)*dep_offset] \n\t      = gradient_multipass_b[independent_index_[iindep]][i];\n\t  }\n\t}\n      }\n    }\n  }\n  \n  // Return the Jacobian matrix in the matrix \"jac\", using the forward\n  // or reverse method depending which would be faster\n  void Stack::jacobian(Array<2,Real,false> jac) const {\n    if (jac.dimension(0) != n_dependent()\n\t|| jac.dimension(1) != n_independent()) {\n      throw size_mismatch(\"Jacobian matrix has wrong size\");\n    }\n    if (n_independent() <= n_dependent()) {\n      jacobian_forward(jac.data(), jac.offset(0), jac.offset(1));\n    }\n    else {\n      jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1));\n    }\n  }\n\n  // Return the Jacobian matrix in the matrix \"jac\", explicitly\n  // specifying whether to use the forward or reverse method\n  void Stack::jacobian_forward(Array<2,Real,false> jac) const {\n    if (jac.dimension(0) != n_dependent()\n\t|| jac.dimension(1) != n_independent()) {\n      throw size_mismatch(\"Jacobian matrix has wrong size\");\n    }\n    jacobian_forward(jac.data(), jac.offset(0), jac.offset(1));\n  }\n\n  void Stack::jacobian_reverse(Array<2,Real,false> jac) const {\n    if (jac.dimension(0) != n_dependent()\n\t|| jac.dimension(1) != n_independent()) {\n      throw size_mismatch(\"Jacobian matrix has wrong size\");\n    }\n    jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1));\n  }\n\n  // Return the Jacobian matrix using the forward or reverse method\n  // depending which would be faster\n  Array<2,Real,false> Stack::jacobian() const {\n    Array<2,Real,false> jac(n_dependent(), n_independent());\n    if (n_independent() <= n_dependent()) {\n      jacobian_forward(jac.data(), jac.offset(0), jac.offset(1));\n    }\n    else {\n      jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1));\n    }\n    return jac;\n  }\n\n  // Return the Jacobian matrix, explicitly specifying whether to use\n  // the forward or reverse method\n  Array<2,Real,false> Stack::jacobian_forward() const {\n    Array<2,Real,false> jac(n_dependent(), n_independent());\n    jacobian_forward(jac.data(), jac.offset(0), jac.offset(1));\n    return jac;\n  }\n\n  Array<2,Real,false> Stack::jacobian_reverse() const {\n    Array<2,Real,false> jac(n_dependent(), n_independent());\n    jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1));\n    return jac;\n  }\n\n} // End namespace adept\n"
  },
  {
    "path": "adept/line_search.cpp",
    "content": "/* line_search.cpp -- Approximate minimization of function along a line\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <limits>\n#include <cmath>\n#include <adept/Minimizer.h>\n\nnamespace adept {\n\n  // Compute the cost function \"cf\" and gradient vector \"gradient\",\n  // along with the scalar gradient \"grad\" in the search direction\n  // \"direction\" (normalized with \"dir_scaling\"), from the state\n  // vector \"x\" plus a step \"step_size\" in the search direction. If\n  // the resulting cost function and gradient satisfy the Wolfe\n  // conditions for sufficient convergence, copy the new state vector\n  // to \"x\" and the step size to \"final_step_size\", and return\n  // MINIMIZER_STATUS_SUCCESS.  Otherwise, return\n  // MINIMIZER_STATUS_NOT_YET_CONVERGED.  Error conditions\n  // MINIMIZER_STATUS_INVALID_COST_FUNCTION and\n  // MINIMIZER_STATUS_INVALID_GRADIENT are also possible.\n  MinimizerStatus\n  Minimizer::line_search_gradient_check(\n\tOptimizable& optimizable, // Object defining function to be minimized\n\tVector x, // Initial and returned state vector\n\tconst Vector& direction, // Un-normalized search direction\n\tVector test_x, // Test state vector (working memory)\n\tReal& final_step_size, // Returned step size if converged\n\tVector gradient, // Gradient vector\n\tint& state_up_to_date, // Is state up-to-date?\n\tReal step_size, // Candidate step size\n\tReal grad0, // Gradient in direction at start of line search\n\tReal dir_scaling, // Scaling of direction vector\n\tReal& cf, // Returned cost function\n\tReal& grad, // Returned gradient in direction\n\tReal curvature_coeff) // Factor by which gradient should reduce (0-1)\n  {\n    test_x = x + (step_size * dir_scaling) * direction;\n    cf = optimizable.calc_cost_function_gradient(test_x, gradient);\n    ++n_samples_;\n    state_up_to_date = -1;\n\n    // Check cost function and gradient are finite\n    if (!std::isfinite(cf)) {\n      return MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n    }\n    else if (any(!isfinite(gradient))) {\n      return MINIMIZER_STATUS_INVALID_GRADIENT;\n    }\n\n    // Calculate gradient in search direction\n    grad = dot_product(direction, gradient) * dir_scaling;\n\n    // Check Wolfe conditions\n    if (cf <= cost_function_ + armijo_coeff_*step_size*grad0 // Armijo condition\n\t&& std::fabs(grad) <= -curvature_coeff*grad0) { // Curvature condition\n      x = test_x;\n      final_step_size = step_size;\n      cost_function_ = cf;\n      state_up_to_date = 1;\n      return MINIMIZER_STATUS_SUCCESS;\n    }\n    else {\n      return MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    }\n  }\n\n  // Perform line search starting at state vector \"x\" with gradient\n  // vector \"gradient\", and initial step \"step_size\" in un-normalized\n  // direction \"direction\". Successful minimization of the function\n  // (according to Wolfe conditions) will lead to\n  // MINIMIZER_STATUS_SUCCESS being returned, the new state stored in\n  // \"x\", and if state_up_to_date >= 1 then the gradient stored in\n  // \"gradient\". Other possible return values are\n  // MINIMIZER_STATUS_FAILED_TO_CONVERGE and\n  // MINIMIZER_STATUS_DIRECTION_UPHILL if the initial direction points\n  // uphill, or MINIMIZER_STATUS_INVALID_COST_FUNCTION,\n  // MINIMIZER_STATUS_INVALID_GRADIENT or\n  // MINIMIZER_STATUS_BOUND_REACHED. First the minimum is bracketed,\n  // then a cubic polynomial is fitted to the values and gradients of\n  // the function at the two points in order to select the next test\n  // point.\n  MinimizerStatus\n  Minimizer::line_search(\n\t Optimizable& optimizable,  // Object defining function to be minimized\n\t Vector x, // Initial and returned state vector\n\t const Vector& direction, // Un-normalized search direction\n\t Vector test_x, // Test state vector (working memory)\n\t Real& step_size, // Initial and final step size\n\t Vector gradient, // Initial and possibly final gradient\n\t int& state_up_to_date, // 1 if gradient up-to-date, -1 otherwise\n\t Real curvature_coeff, // Factor by which gradient should reduce (0-1)\n\t Real bound_step_size) // Maximum step until bound is reached (-1 for no bound)\n  {\n    Real dir_scaling = 1.0 / norm2(direction);\n\n    // Numerical suffixes to variables indicate different locations\n    // along the line:\n    // 0 = initial point of line search, constant within this function\n    // 1 = point at which gradient has been calculated (initially the same as 0)\n    // 2 = test point\n    // 3 = test point\n\n    // Step sizes\n    const Real ss0 = 0.0;\n    Real ss1 = ss0;\n    Real ss2 = step_size;\n    Real ss3;\n\n    // Gradients in search direction\n    Real grad0 = dot_product(direction, gradient) * dir_scaling;\n    Real grad1 = grad0;\n    Real grad2, grad3;\n\n    // Cost function values\n    Real cf0 = cost_function_;\n    Real cf1 = cf0;\n    Real cf2, cf3;\n\n    int iterations_remaining = max_line_search_iterations_;\n\n    bool is_bound_step = (bound_step_size > 0.0);\n    bool at_bound = false;\n\n    if (grad0 >= 0.0) {\n      return MINIMIZER_STATUS_DIRECTION_UPHILL;\n    }\n\n    // Check initial step size is within bounds\n    if (max_step_size_ > 0.0 && ss2 > max_step_size_) {\n      ss2 = max_step_size_;\n    }\n    if (is_bound_step && ss2 >= bound_step_size) {\n      ss2 = bound_step_size;\n      at_bound = true;\n    }\n\n    // First step: bound the minimum\n    while (iterations_remaining > 0) {\n\n      MinimizerStatus status\n\t= line_search_gradient_check(optimizable, x, direction, test_x,\n\t\t\t\t     step_size, gradient, state_up_to_date,\n\t\t\t\t     ss2, grad0, dir_scaling,\n\t\t\t\t     cf2, grad2, curvature_coeff);\n      if (status == MINIMIZER_STATUS_SUCCESS) {\n\tif (at_bound) {\n\t  status = MINIMIZER_STATUS_BOUND_REACHED;\n\t}\n\treturn status;\n      }\n      else if (status != MINIMIZER_STATUS_NOT_YET_CONVERGED) {\n\t// Cost function or its gradient not finite: revert to\n\t// previous step\n\tstep_size = cf1;\n\tif (cf1 > 0.0) {\n\t  x += (ss1 * dir_scaling) * direction;\n\t}\n\tstate_up_to_date = 0;\n\treturn status;\n      }\n     \n      if (grad2 > 0.0 || cf2 >= cf1) {\n\t// Positive gradient or cost function increase -> bounded\n\t// between points 1 and 2\n\tbreak;\n      }\n      else if (at_bound) {\n\t// The cost function has been reduced but we are already at\n\t// the maximum step size and the gradient points towards it:\n\t// make this point the solution\n\tx += (ss2 * dir_scaling) * direction;\n\tstep_size = ss2;\n\tcost_function_ = cf2;\n\tstate_up_to_date = 1;\n\treturn MINIMIZER_STATUS_BOUND_REACHED;\n      }\n      else {\n\t// Reduced cost function but not yet bounded -> look further\n\t// ahead\n\tReal new_step;\n\tif (cf1 > cf2+grad2*(ss1-ss2)) {\n\t  // Positive curvature: fit a quadratic\n\t  Real curvature = 2.0*(cf1-cf2-grad2*(ss1-ss2))/((ss1-ss2)*(ss1-ss2));\n\t  new_step = ss2-grad2/curvature; // Newton's method\n\t  // Bounds on actual step size\n\t  new_step = std::max(ss1+1.1*(ss2-ss1), std::min(new_step, ss1+10.0*(ss2-ss1)));\n\t  if (max_step_size_ > 0.0 && new_step-ss2 > max_step_size_) {\n\t    new_step = ss2 + max_step_size_;\n\t  }\n\t}\n\telse {\n\t  // Cliff gets steeper... simply jump ahead a lot more\n\t  new_step = ss2 + 5.0*(ss2-ss1);\n\t  if (max_step_size_ > 0.0 && new_step-ss2 > max_step_size_) {\n\t    new_step = ss2 + max_step_size_;\n\t  }\n\t}\n\tss1 = ss2;\n\tcf1 = cf2;\n\tgrad1 = grad2;\n\tss2 = new_step;\n\n\tif (is_bound_step && ss2 >= bound_step_size) {\n\t  ss2 = bound_step_size;\n\t  at_bound = true;\n\t}\n      }\n\n    }\n\n    // Second step: reduce the bounds until we get sufficiently close\n    // to the minimum\n    while (iterations_remaining > 0) {\n\n      if (ss2 <= ss1) {\n\t// Two points are identical!\n\tif (cf1 < cf0) {\n\t  // Return value at point 1\n\t  x += (ss1 * dir_scaling) * direction;\n\t  step_size = ss1;\n\t  cost_function_ = cf1;\n\t  return MINIMIZER_STATUS_SUCCESS;\n\t}\n\telse {\n\t  // Cost function did not decrease at all\n\t  return MINIMIZER_STATUS_FAILED_TO_CONVERGE;\n\t}\n      }\n\n      // Minimizer of cubic function\n      Real step_diff = ss2-ss1;\n      Real theta = (cf1-cf2) * 3.0 / step_diff + grad1 + grad2;\n      Real max_grad = std::max(std::fabs(theta),\n\t\t\t       std::max(std::fabs(grad1), std::fabs(grad2)));\n      Real scaled_theta = theta / max_grad;\n      Real gamma = max_grad * std::sqrt(scaled_theta*scaled_theta\n\t\t\t\t\t- (grad1/max_grad) * (grad2/max_grad));\n      ss3 = ss1 + ((gamma - grad1 + theta) / (2.0*gamma + grad2 - grad1)) * step_diff;\n\n\n      // Bound the step size to be at least 5% away from each end\n      ss3 = std::max(0.95*ss1+0.05*ss2,\n\t\t     std::min(0.05*ss1+0.95*ss2, ss3));\n\n      MinimizerStatus status\n\t= line_search_gradient_check(optimizable, x, direction, test_x,\n\t\t\t\t     step_size, gradient, state_up_to_date,\n\t\t\t\t     ss3, grad0, dir_scaling,\n\t\t\t\t     cf3, grad3, curvature_coeff);\n      if (status == MINIMIZER_STATUS_SUCCESS) {\n\treturn status;\n      }\n      else if (status != MINIMIZER_STATUS_NOT_YET_CONVERGED) {\n\t// Cost function or its gradient not finite: revert to\n\t// previous step\n\tstep_size = cf1;\n\tif (cf1 > 0.0) {\n\t  x += (ss1 * dir_scaling) * direction;\n\t}\n\tstate_up_to_date = 0;\n\treturn status;\n      }\n     \n      if (grad3 > 0.0) {\n\t// Positive gradient -> bounded between points 1 and 3\n\tss2 = ss3;\n\tcf2 = cf3;\n\tgrad2 = grad3;\n      }\n      else if (cf3 < cf1) {\n\t// Reduced cost function, negative gradient\n\tss1 = ss3;\n\tcf1 = cf3;\n\tgrad1 = grad3;\n      }\n      else {\n\t// Increased cost function, negative gradient\n\tss2 = ss3;\n\tcf2 = cf3;\n\tgrad2 = grad3;\n      }\t\n\n      --iterations_remaining;\n    }\n\n    // Maximum iterations reached: check if cost function has been\n    // reduced at all\n    state_up_to_date = -1;\n    if (cf2 < cf1) {\n      // Return value at point 2\n      x += (ss2 * dir_scaling) * direction;\n      step_size = ss2;\n      cost_function_ = cf2;  \n    }\n    else if (cf1 < cf0) {\n      // Return value at point 1\n      x += (ss1 * dir_scaling) * direction;\n      step_size = ss1;\n      cost_function_ = cf1;  \n    }\n    else {\n      // Cost function did not decrease at all\n      return MINIMIZER_STATUS_FAILED_TO_CONVERGE;\n    }\n\n    // Cost function decreased\n    return MINIMIZER_STATUS_SUCCESS;\n\n  }\n\n}\n"
  },
  {
    "path": "adept/minimize_conjugate_gradient.cpp",
    "content": "/* minimize_conjugate_gradient.cpp -- Minimize function using Conjugate Gradient algorithm\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <limits>\n#include <cmath>\n#include <adept/Minimizer.h>\n\nnamespace adept {\n\n  // Minimize the cost function embodied in \"optimizable\" using the\n  // Conjugate-Gradient algorithm, where \"x\" is the initial state\n  // vector and also where the solution is stored. By default the\n  // Polak-Ribiere method is used to compute the new search direction,\n  // but Fletcher-Reeves is also available.\n  MinimizerStatus\n  Minimizer::minimize_conjugate_gradient(Optimizable& optimizable, Vector x,\n\t\t\t\t\t bool use_fletcher_reeves)\n  {\n    int nx = x.size();\n\n    // Initial values\n    n_iterations_ = 0;\n    n_samples_ = 0;\n    status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    cost_function_ = std::numeric_limits<Real>::infinity();\n\n    // The Conjugate-Gradient method is the most efficient\n    // gradient-based method in terms of memory usage, requiring a\n    // working memory of just 4*nx, making it suitable for large state\n    // vectors.\n    Vector gradient(nx);\n    Vector previous_gradient(nx);\n    Vector direction(nx);\n    Vector test_x(nx); // Used by the line search only\n\n    // Does the last calculation of the cost function in \"optimizable\"\n    // match the current contents of the state vector x? -1=no, 0=yes,\n    // 1=yes and the last calculation included the gradient, 2=yes and\n    // the last calculation included gradient and Hessian.\n    int state_up_to_date = -1;\n\n    // Initial step size\n    Real step_size = 1.0;\n    if (max_step_size_ > 0.0) {\n      step_size = max_step_size_;\n    }\n\n    // A restart is performed every nx+1 iterations\n    bool do_restart = true;\n    int iteration_at_last_restart = n_iterations_;\n\n    // Main loop\n    while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) {\n\n      // If the last line search found a minimum along the lines\n      // satisfying the Wolfe conditions, then the current cost\n      // function and gradient will be consistent with the current\n      // state vector.  Otherwise we need to compute them.\n      if (state_up_to_date < 1) {\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n\tstate_up_to_date = 1;\n\t++n_samples_;\n      }\n\n      if (n_iterations_ == 0) {\n\tstart_cost_function_ = cost_function_;\n      }\n\n      // Check cost function and gradient are finite\n      if (!std::isfinite(cost_function_)) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\tbreak;\n      }\n      else if (any(!isfinite(gradient))) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\tbreak;\n      }\n\n      // Compute L2 norm of gradient to see how \"flat\" the environment\n      // is\n      gradient_norm_ = norm2(gradient);\n\n      // Report progress using user-defined function\n      optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_);\n\n      // Convergence has been achieved if the L2 norm has been reduced\n      // to a user-specified threshold\n      if (gradient_norm_ <= converged_gradient_norm_) {\n\tstatus_ = MINIMIZER_STATUS_SUCCESS;\n\tbreak;\n      }\n\n      // Restart every nx+1 iterations\n      if (n_iterations_ - iteration_at_last_restart > nx) {\n\tdo_restart = true;\n      }\n\n      // Find search direction\n      if (do_restart) {\n\t// Simple gradient descent after a restart\n\tdirection = -gradient;\n\tdo_restart = false;\n\titeration_at_last_restart = n_iterations_;\n      }\n      else {\n\t// The brains of the Conjugate-Gradient method - note that\n\t// generally the Polak-Ribiere method is believed to be\n\t// superior to Fletcher-Reeves\n\tReal beta;\n\tif (use_fletcher_reeves) {\n\t  // Fletcher-Reeves method\n\t  beta = dot_product(gradient, gradient) \n\t    / dot_product(previous_gradient, previous_gradient);\n\t}\n\telse {\n\t  // Default: Polak-Ribiere method\n\t  beta = std::max(sum(gradient * (gradient - previous_gradient))\n\t\t\t  / dot_product(previous_gradient, previous_gradient),\n\t\t\t  0.0);\n\t}\n\t// beta==0 is equivalent to gradient descent (i.e. a restart)\n\tif (beta <= 0) {\n\t  iteration_at_last_restart = n_iterations_;\n\t}\n\t// Compute new direction\n\tdirection = beta*direction - gradient;\n      }\n\n      // Store gradient for computing beta in next iteration\n      previous_gradient = gradient;\n\n      // Perform line search, storing new state vector in x\n      MinimizerStatus ls_status\n\t= line_search(optimizable, x, direction,\n\t\t      test_x, step_size, gradient, state_up_to_date,\n\t\t      cg_curvature_coeff_);\n\n      if (ls_status == MINIMIZER_STATUS_SUCCESS) {\n\t// Successfully minimized along search direction: continue to\n\t// next iteration\n\tstatus_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n      }\n      else if (iteration_at_last_restart != n_iterations_) {\n\t// Line search either made no progress or encountered a\n\t// non-finite cost function or gradient, and this was not a\n\t// restart; try restarting once\n\tdo_restart = true;\n\tstatus_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n      }\n      else {\n\t// Unrecoverable failure in line-search: return status to\n\t// calling function\n\tstatus_ = ls_status;\n      }\n\n      // Better convergence if first step size on next line search is\n      // larger than the actual step size on the last line search\n      step_size *= 2.0;\n\n      ++n_iterations_;\n      if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED\n\t  && n_iterations_ >= max_iterations_) {\n\tstatus_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED;\n      }\n\n      // End of main loop: if status_ is anything other than\n      // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations\n      // are performed\n    }\n     \n    if (state_up_to_date < ensure_updated_state_) {\n      // The last call to calc_cost_function* was not with the state\n      // vector returned to the user, and they want it to be.\n      if (ensure_updated_state_ > 0) {\n\t// User wants at least the first derivative\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n      }\n      else {\n\t// User does not need derivatives to have been computed\n\tcost_function_ = optimizable.calc_cost_function(x);\n      }\n    }\n\n    return status_;\n  }\n\n  // Minimize the cost function embodied in \"optimizable\" using the\n  // Conjugate-Gradient algorithm, where \"x\" is the initial state\n  // vector and also where the solution is stored, subject to the\n  // constraint that x lies between min_x and max_x. By default the\n  // Polak-Ribiere method is used to compute the new search direction,\n  // but Fletcher-Reeves is also available.\n  MinimizerStatus\n  Minimizer::minimize_conjugate_gradient_bounded(Optimizable& optimizable, Vector x,\n\t\t\t\t\t const Vector& min_x,\n\t\t\t\t\t const Vector& max_x,\n\t\t\t\t\t bool use_fletcher_reeves)\n  {\n    if (any(min_x >= max_x)\n\t|| min_x.size() != x.size()\n\t|| max_x.size() != x.size()) {\n      return MINIMIZER_STATUS_INVALID_BOUNDS;\n    }\n\n    int nx = x.size();\n\n    // Initial values\n    n_iterations_ = 0;\n    n_samples_ = 0;\n    status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    cost_function_ = std::numeric_limits<Real>::infinity();\n\n    // The Conjugate-Gradient method is the most efficient\n    // gradient-based method in terms of memory usage, requiring a\n    // working memory of just 4*nx, making it suitable for large state\n    // vectors.\n    Vector gradient(nx);\n    Vector previous_gradient(nx);\n    Vector direction(nx);\n    Vector test_x(nx); // Used by the line search only\n\n    // Which state variables are at the minimum bound (-1), maximum\n    // bound (1) or free (0)?\n    intVector bound_status(nx);\n    bound_status = 0;\n\n    // Ensure that initial x lies within the specified bounds\n    bound_status.where(x >= max_x) =  1;\n    bound_status.where(x <= min_x) = -1;\n    x = max(min_x, min(x, max_x));\n\n    int nbound = count(bound_status != 0);\n    int nfree  = nx - nbound;\n\n    // Floating-point number containing 1.0 if unbound and 0.0 if\n    // bound\n    Vector unbound_status(nx);\n    unbound_status = 1.0-fabs(bound_status);\n\n    // Does the last calculation of the cost function in \"optimizable\"\n    // match the current contents of the state vector x? -1=no, 0=yes,\n    // 1=yes and the last calculation included the gradient, 2=yes and\n    // the last calculation included gradient and Hessian.\n    int state_up_to_date = -1;\n\n    // Initial step size\n    Real step_size = 1.0;\n    if (max_step_size_ > 0.0) {\n      step_size = max_step_size_;\n    }\n\n    // A restart is performed every nx+1 iterations\n    bool do_restart = true;\n    int iteration_at_last_restart = n_iterations_;\n\n    // Main loop\n    while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) {\n\n      // If the last line search found a minimum along the lines\n      // satisfying the Wolfe conditions, then the current cost\n      // function and gradient will be consistent with the current\n      // state vector.  Otherwise we need to compute them.\n      if (state_up_to_date < 1) {\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n\tstate_up_to_date = 1;\n\t++n_samples_;\n\n\tif (n_iterations_ == 0) {\n\t  start_cost_function_ = cost_function_;\n\t}\n\n\t// Check cost function and gradient are finite\n\tif (!std::isfinite(cost_function_)) {\n\t  status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\t  break;\n\t}\n\telse if (any(!isfinite(gradient))) {\n\t  status_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\t  break;\n\t}\n\n      }\n\n      // Check whether the bound status of each state variable is\n      // consistent with the gradient if a steepest descent were to be\n      // taken, and if not flag a restart\n      if (any(bound_status == -1 && gradient < 0.0)\n\t  || any(bound_status == 1 && gradient > 0.0)) {\n\tbound_status.where(bound_status == -1 && gradient < 0.0) = 0;\n\tbound_status.where(bound_status ==  1 && gradient > 0.0) = 0;\n\tunbound_status = 1.0-fabs(bound_status);\n\tdo_restart = true;\n      }\n      nbound = count(bound_status != 0);\n      nfree = nx - nbound;\n\n      // Set gradient at bound points to zero\n      gradient.where(bound_status != 0) = 0.0;\n\n      // Compute L2 norm of gradient to see how \"flat\" the environment\n      // is\n      if (nfree > 0) {\n\tgradient_norm_ = norm2(gradient);\n      }\n      else {\n\t// If no dimensions are in play we are at a corner of the\n\t// bounds and the gradient is pointing into the corner: we\n\t// have reached a minimum in the cost function subject to the\n\t// bounds so have converged\n\tgradient_norm_ = 0.0;\n      }\n\n      // Report progress using user-defined function\n      optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_);\n\n      // Convergence has been achieved if the L2 norm has been reduced\n      // to a user-specified threshold\n      if (gradient_norm_ <= converged_gradient_norm_) {\n\tstatus_ = MINIMIZER_STATUS_SUCCESS;\n\tbreak;\n      }\n\n      // Restart every nx+1 iterations\n      if (n_iterations_ - iteration_at_last_restart > nx) {\n\tdo_restart = true;\n      }\n\n      // Find search direction\n      if (do_restart) {\n\t// Simple gradient descent after a restart\n\tdirection = -gradient;\n\tdo_restart = false;\n\titeration_at_last_restart = n_iterations_;\n      }\n      else {\n\t// The brains of the Conjugate-Gradient method - note that\n\t// generally the Polak-Ribiere method is believed to be\n\t// superior to Fletcher-Reeves\n\tReal beta;\n\tif (use_fletcher_reeves) {\n\t  // Fletcher-Reeves method\n\t  beta = dot_product(gradient, gradient) \n\t    / dot_product(previous_gradient, previous_gradient);\n\t}\n\telse {\n\t  // Default: Polak-Ribiere method\n\t  beta = std::max(sum(gradient * (gradient - previous_gradient))\n\t\t\t  / dot_product(previous_gradient, previous_gradient),\n\t\t\t  0.0);\n\t}\n\t// beta==0 is equivalent to gradient descent (i.e. a restart)\n\tif (beta <= 0) {\n\t  iteration_at_last_restart = n_iterations_;\n\t}\n\t// Compute new direction\n\tdirection = beta*direction - gradient;\n      }\n\n      // Store gradient for computing beta in next iteration\n      previous_gradient = gradient;\n\n      // Distance to the nearest bound\n      Real dir_scaling = norm2(direction);\n      Real bound_step_size = std::numeric_limits<Real>::max();\n      int i_nearest_bound = -1;\n      int i_bound_type = 0;\n      // Work out the maximum step size along \"direction\" before a\n      // bound is met... there must be a faster way to do this\n      for (int ix = 0; ix < nx; ++ix) {\n\tif (direction(ix) > 0.0 && max_x(ix) < std::numeric_limits<Real>::max()) {\n\t  Real local_bound_step_size = dir_scaling*(max_x(ix)-x(ix))/direction(ix);\n\t  if (bound_step_size >= local_bound_step_size) {\n\t    bound_step_size = local_bound_step_size;\n\t    i_nearest_bound = ix;\n\t    i_bound_type = 1;\n\t  }\t\t\t\t   \n\t}\n\telse if (direction(ix) < 0.0 && min_x(ix) > -std::numeric_limits<Real>::max()) {\n\t  Real local_bound_step_size = dir_scaling*(min_x(ix)-x(ix))/direction(ix);\n\t  if (bound_step_size >= local_bound_step_size) {\n\t    bound_step_size = local_bound_step_size;\n\t    i_nearest_bound = ix;\n\t    i_bound_type = -1;\n\t  }\n\t}\n      }\n\n      MinimizerStatus ls_status; // line-search outcome\n      if (i_nearest_bound >= 0) {\n\t// Perform line search, storing new state vector in x\n\tls_status = line_search(optimizable, x, direction,\n\t\t\t       test_x, step_size, gradient, state_up_to_date,\n\t\t\t       cg_curvature_coeff_, bound_step_size);\n\tif (ls_status == MINIMIZER_STATUS_BOUND_REACHED) {\n\t  bound_status(i_nearest_bound) = i_bound_type;\n\t  do_restart = true;\n\t  ls_status = MINIMIZER_STATUS_SUCCESS;\n\t}\n      }\n      else {\n\t// Perform line search, storing new state vector in x\n\tls_status = line_search(optimizable, x, direction,\n\t\t\t\ttest_x, step_size, gradient, state_up_to_date,\n\t\t\t\tcg_curvature_coeff_);\n      }\n\n      if (ls_status == MINIMIZER_STATUS_SUCCESS) {\n\t// Successfully minimized along search direction: continue to\n\t// next iteration\n\tstatus_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n      }\n      else if (iteration_at_last_restart != n_iterations_) {\n\t// Line search either made no progress or encountered a\n\t// non-finite cost function or gradient, and this was not a\n\t// restart; try restarting once\n\tdo_restart = true;\n\tstatus_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n      }\n      else {\n\t// Unrecoverable failure in line-search: return status to\n\t// calling function\n\tstatus_ = ls_status;\n      }\n\n      // Better convergence if first step size on next line search is\n      // larger than the actual step size on the last line search\n      step_size *= 2.0;\n\n      ++n_iterations_;\n      if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED\n\t  && n_iterations_ >= max_iterations_) {\n\tstatus_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED;\n      }\n\n      // End of main loop: if status_ is anything other than\n      // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations\n      // are performed\n    }\n     \n    if (state_up_to_date < ensure_updated_state_) {\n      // The last call to calc_cost_function* was not with the state\n      // vector returned to the user, and they want it to be.\n      if (ensure_updated_state_ > 0) {\n\t// User wants at least the first derivative\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n      }\n      else {\n\t// User does not need derivatives to have been computed\n\tcost_function_ = optimizable.calc_cost_function(x);\n      }\n    }\n\n    return status_;\n  }\n\n};\n"
  },
  {
    "path": "adept/minimize_levenberg_marquardt.cpp",
    "content": "/* minimize_levenberg_marquardt.cpp -- Minimize function using Levenberg-Marquardt algorithm\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <limits>\n#include <cmath>\n#include <adept/Minimizer.h>\n\nnamespace adept {\n\n  // Minimize the cost function embodied in \"optimizable\" using the\n  // Levenberg-Marquardt algorithm, where \"x\" is the initial state\n  // vector and also where the solution is stored.\n  MinimizerStatus\n  Minimizer::minimize_levenberg_marquardt(Optimizable& optimizable, Vector x,\n\t\t\t\t\t  bool use_additive_damping)\n  {\n    int nx = x.size();\n\n    // Initial values\n    n_iterations_ = 0;\n    n_samples_ = 0;\n    status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    cost_function_ = std::numeric_limits<Real>::infinity();\n\n    Real new_cost;\n\n    // The main memory storage for the Levenberg family of methods\n    // consists of the following three vectors...\n    Vector new_x(nx);\n    Vector gradient(nx);\n    Vector dx(nx);\n\n    // ...and the Hessian matrix, which is stored explicitly\n    SymmMatrix hessian(nx);\n    hessian = 0.0;\n\n    Real damping = levenberg_damping_start_;\n    gradient_norm_ = -1.0;\n\n    // Original Levenberg is additive to the diagonal of the Hessian\n    // so to make the performance insensitive to an arbitrary scaling\n    // of the cost function, we scale the damping factor by the mean\n    // of the diagonal of the Hessian\n    Real diag_scaling;\n\n    // Does the last calculation of the cost function in \"optimizable\"\n    // match the current contents of the state vector x? -1=no, 0=yes,\n    // 1=yes and the last calculation included the gradient, 2=yes and\n    // the last calculation included gradient and Hessian.\n    int state_up_to_date = -1;\n\n    do {\n      // At this point we have either just started or have just\n      // reduced the cost function\n      cost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient, hessian);\n      diag_scaling = mean(hessian.diag_vector());\n      state_up_to_date = 2;\n      ++n_samples_;\n      if (n_iterations_ == 0) {\n\tstart_cost_function_ = cost_function_;\n      }\n\n      // Check cost function and gradient are finite\n      if (!std::isfinite(cost_function_)) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\tbreak;\n      }\n      else if (any(!isfinite(gradient))) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\tbreak;\n      }\n      // Compute L2 norm of gradient to see how \"flat\" the environment\n      // is\n      gradient_norm_ = norm2(gradient);\n      // Report progress using user-defined function\n      optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_);\n      // Convergence has been achieved if the L2 norm has been reduced\n      // to a user-specified threshold\n      if (gradient_norm_ <= converged_gradient_norm_) {\n\tstatus_ = MINIMIZER_STATUS_SUCCESS;\n\tbreak;\n      }\n\n      // Try to minimize cost function \n      Real previous_diag_scaling  = 1.0; // Used in Levenberg-Marquardt version\n      Real previous_diag_modifier = 0.0; // Used in Levenberg version\n      while(true) {\n\tif (!use_additive_damping) {\n\t  // Levenberg-Marquardt formula: scale the diagonal of the\n\t  // Hessian, where the larger the value of \"damping\", the\n\t  // closer the resulting behaviour is to steepest descent\n\t  hessian.diag_vector() *= (1.0 + damping)/previous_diag_scaling;\n\t  previous_diag_scaling = 1.0 + damping;\n\t}\n\telse {\n\t  // Older Levenberg approach: add to the diagonal instead\n\t  hessian.diag_vector() += damping*diag_scaling - previous_diag_modifier;\n\t  previous_diag_modifier = damping*diag_scaling;\n\t}\n\tdx = -adept::solve(hessian, gradient);\n\n\t// Limit the maximum step size, if required\n\tif (max_step_size_ > 0.0) {\n\t  Real max_dx = maxval(abs(dx));\n\t  if (max_dx > max_step_size_) {\n\t    dx *= (max_step_size_/max_dx);\n\t  }\n\t}\n\n\t// Compute new cost state vector and cost function, but not\n\t// gradient or Hessian for efficiency\n\tnew_x = x+dx;\n\tnew_cost = optimizable.calc_cost_function(new_x);\n\tstate_up_to_date = -1;\n\t++n_samples_;\n\n\t// If cost function is not finite it may be possible to\n\t// recover by trying smaller step sizes\n\tbool cost_invalid = !std::isfinite(new_cost);\n\n\tif (new_cost >= cost_function_ || cost_invalid) {\n\t  // We haven't managed to reduce the cost function: increase\n\t  // damping value to take smaller steps\n\t  if (damping <= 0.0) {\n\t    damping = levenberg_damping_restart_;\n\t  }\n\t  else if (damping < levenberg_damping_max_) {\n\t    damping *= levenberg_damping_multiplier_;\n\t  }\n\t  else {\n\t    // The damping value is now larger than the maximum so we\n\t    // can get no further\n\t    if (cost_invalid) {\n\t      status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\t    }\n\t    else {\n\t      status_ = MINIMIZER_STATUS_FAILED_TO_CONVERGE;\n\t    }\n\t    break;\n\t  }\n\t}\n\telse {\n\t  // Managed to reduce cost function\n\t  x = new_x;\n\t  n_iterations_++;\n\t  // Reduce damping for next iteration\n\t  if (damping > levenberg_damping_min_) {\n\t    damping /= levenberg_damping_divider_;\n\t  }\n\t  else {\n\t    damping = 0.0;\n\t  }\n\t  if (n_iterations_ >= max_iterations_) {\n\t    status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED;\n\t  }\n\t  break;\n\t}\n      } // Inner loop\n    }\n    while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED);\n     \n    if (state_up_to_date < ensure_updated_state_) {\n      // The last call to calc_cost_function* was not with the state\n      // vector returned to the user, and they want it to be.  Note\n      // that the cost function and gradient norm ought to be\n      // up-to-date already at this point.\n      if (ensure_updated_state_ > 0) {\n\t// User wants at least the first derivative, but\n\t// calc_cost_function_gradient() is not guaranteed to be\n\t// present so we call the hessain function\n\tcost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient,\n\t\t\t\t\t\t\t\t\t hessian);\n      }\n      else {\n\t// User does not need derivatives to have been computed\n\tcost_function_ = optimizable.calc_cost_function(x);\n      }\n    }\n\n    return status_;\n  }\n\n\n  // Minimize the cost function embodied in \"optimizable\" using the\n  // Levenberg-Marquardt algorithm, where \"x\" is the initial state\n  // vector and also where the solution is stored, subject to the\n  // constraint that x lies between min_x and max_x.\n  MinimizerStatus\n  Minimizer::minimize_levenberg_marquardt_bounded(Optimizable& optimizable,\n\t\t\t\t\t\t  Vector x,\n\t\t\t\t\t\t  const Vector& min_x,\n\t\t\t\t\t\t  const Vector& max_x,\n\t\t\t\t\t\t  bool use_additive_damping)\n  {\n    if (any(min_x >= max_x)\n\t|| min_x.size() != x.size()\n\t|| max_x.size() != x.size()) {\n      return MINIMIZER_STATUS_INVALID_BOUNDS;\n    }\n\n    int nx = x.size();\n\n    // Initial values\n    n_iterations_ = 0;\n    n_samples_ = 0;\n    status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    cost_function_ = std::numeric_limits<Real>::infinity();\n\n    Real new_cost;\n\n    // The main memory storage for the Levenberg family of methods\n    // consists of the following three vectors...\n    Vector new_x(nx);\n    Vector gradient(nx);\n    Vector dx(nx);\n\n    // ...and the Hessian matrix, which is stored explicitly\n    SymmMatrix hessian(nx);\n    SymmMatrix modified_hessian(nx);\n    SymmMatrix sub_hessian;\n    Vector sub_gradient;\n    Vector sub_dx;\n    hessian = 0.0;\n    Real damping = levenberg_damping_start_;\n\n    // Which state variables are at the minimum bound (-1), maximum\n    // bound (1) or free (0)?\n    intVector bound_status(nx);\n    bound_status = 0;\n\n    // Ensure that initial x lies within the specified bounds\n    bound_status.where(x >= max_x) =  1;\n    bound_status.where(x <= min_x) = -1;\n    x = max(min_x, min(x, max_x));\n\n    int nbound = count(bound_status != 0);\n    int nfree  = nx - nbound;\n    gradient_norm_ = -1.0;\n\n    // Original Levenberg is additive to the diagonal of the Hessian\n    // so to make the performance insensitive to an arbitrary scaling\n    // of the cost function, we scale the damping factor by the mean\n    // of the diagonal of the Hessian\n    Real diag_scaling;\n\n    // Does the last calculation of the cost function in \"optimizable\"\n    // match the current contents of the state vector x? -1=no, 0=yes,\n    // 1=yes and the last calculation included the gradient, 2=yes and\n    // the last calculation included gradient and Hessian.\n    int state_up_to_date = -1;\n\n    do {\n      // At this point we have either just started or have just\n      // reduced the cost function\n      cost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient, hessian);\n      diag_scaling = mean(hessian.diag_vector());\n      state_up_to_date = 2;\n      ++n_samples_;\n      if (n_iterations_ == 0) {\n\tstart_cost_function_ = cost_function_;\n      }\n\n      // Check cost function and gradient are finite\n      if (!std::isfinite(cost_function_)) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\tbreak;\n      }\n      else if (any(!isfinite(gradient))) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\tbreak;\n      }\n\n      // Find which dimensions are in play\n      if (nbound > 0) {\n\t// We release any dimensions from being at a minimum or\n\t// maximum bound if two conditions are met: (1) the gradient\n\t// in that dimension slopes away from the bound, and (2) the\n\t// Levenberg-Marquardt formula to compute dx using the current\n\t// value of \"damping\" leads to a point on the valid side of the\n\t// bound\n\tmodified_hessian = hessian;\n\tif (!use_additive_damping) {\n\t  modified_hessian.diag_vector() *= (1.0 + damping);\n\t}\n\telse {\n\t  modified_hessian.diag_vector() += damping*diag_scaling;\n\t}\n\tdx = -adept::solve(modified_hessian, gradient);\n\t// Release points at the minimum bound\n\tbound_status.where(bound_status == -1\n\t\t\t   && gradient < 0.0\n\t\t\t   && dx > 0.0) = 0;\n\t// Release points at the maximum bound\n\tbound_status.where(bound_status == 1\n\t\t\t   && gradient > 0.0\n\t\t\t   && dx < 0.0) = 0;\n      }\n\n      nbound = count(bound_status != 0);\n      nfree  = nx - nbound;\n\n      // List of indices of free state variables\n      intVector ifree(nfree);\n      if (nbound > 0) {\n\tifree = find(bound_status == 0);\n      }\n      else {\n\tifree = range(0, nx-1);\n      }\n\n      // Compute L2 norm of gradient to see how \"flat\" the environment\n      // is, restricting ourselves to the dimensions currently in play\n      if (nfree > 0) {\n\tgradient_norm_ = norm2(gradient(ifree));\n      }\n      else {\n\t// If no dimensions are in play we are at a corner of the\n\t// bounds and the gradient is pointing into the corner: we\n\t// have reached a minimum in the cost function subject to the\n\t// bounds so have converged\n\tgradient_norm_ = 0.0;\n      }\n      // Report progress using user-defined function\n      optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_);\n      // Convergence has been achieved if the L2 norm has been reduced\n      // to a user-specified threshold\n      if (gradient_norm_ <= converged_gradient_norm_) {\n\tstatus_ = MINIMIZER_STATUS_SUCCESS;\n\tbreak;\n      }\n\n      sub_gradient.clear();\n      sub_hessian.clear();\n      if (nbound > 0) {\n\tsub_gradient = gradient(ifree);\n\tsub_hessian  = SymmMatrix(Matrix(hessian)(ifree,ifree));\n      }\n      else {\n\tsub_gradient >>= gradient;\n\tsub_hessian  >>= hessian;\n      }\n\n      // FIX reuse dx if possible below...\n\n      // Try to minimize cost function \n      Real previous_diag_scaling  = 1.0; // Used in Levenberg-Marquardt version\n      Real previous_diag_modifier = 0.0; // Used in Levenberg version\n      while(true) {\n\tsub_dx.resize(nfree);\n\tif (!use_additive_damping) {\n\t  // Levenberg-Marquardt formula: scale the diagonal of the\n\t  // Hessian, where the larger the value of \"damping\", the\n\t  // closer the resulting behaviour is to steepest descent\n\t  sub_hessian.diag_vector() *= (1.0 + damping)/previous_diag_scaling;\n\t  previous_diag_scaling = 1.0 + damping;\n\t}\n\telse {\n\t  // Older Levenberg approach: add to the diagonal instead\n\t  sub_hessian.diag_vector() += damping*diag_scaling - previous_diag_modifier;\n\t  previous_diag_modifier = damping*diag_scaling;\n\t}\n\tsub_dx = -adept::solve(sub_hessian, sub_gradient);\n\n\t// Limit the maximum step size, if required\n\tif (max_step_size_ > 0.0) {\n\t  Real max_dx = maxval(abs(sub_dx));\n\t  if (max_dx > max_step_size_) {\n\t    sub_dx *= (max_step_size_/max_dx);\n\t  }\n\t}\n\n\t// Check for collision with new bounds\n\tintVector new_min_bounds = find(x(ifree)+sub_dx <= min_x(ifree));\n\tintVector new_max_bounds = find(x(ifree)+sub_dx >= max_x(ifree));\n\tReal mmin_frac = 2.0;\n\tReal mmax_frac = 2.0;\n\tint imin = 0, imax = 0;\n\tif (!new_min_bounds.empty()) {\n\t  Vector min_frac = -(x(ifree(new_min_bounds)) - min_x(ifree(new_min_bounds)))\n\t    / sub_dx(new_min_bounds);\n\t  mmin_frac = minval(min_frac);\n\t  imin = new_min_bounds(minloc(min_frac));\n\t}\n\tif (!new_max_bounds.empty()) {\n\t  Vector max_frac = (max_x(ifree(new_max_bounds)) - x(ifree(new_max_bounds)))\n\t    / sub_dx(new_max_bounds);\n\t  mmax_frac = minval(max_frac);\n\t  imax = new_max_bounds(maxloc(max_frac));\n\t}\n\n\tReal frac = 1.0;\n\tint bound_type = 0;\n\tint ibound = 0;\n\tif (mmin_frac <= 1.0 || mmax_frac <= 1.0) {\n\t  if (mmin_frac < mmax_frac) {\n\t    frac = mmin_frac;\n\t    ibound = imin;\n\t    bound_type = -1;\n\t  }\n\t  else {\n\t    frac = mmax_frac;\n\t    ibound = imax;\n\t    bound_type = 1;\n\t  }\t  \n\t  sub_dx *= frac;\n\t}\n\n\t// Compute new state vector and cost function, but not\n\t// gradient or Hessian for efficiency\n\tnew_x = x;\n\tnew_x(ifree) += sub_dx;\n\tnew_cost = optimizable.calc_cost_function(new_x);\n\tstate_up_to_date = -1;\n\t++n_samples_;\n\n\t// If cost function is not finite it may be possible to\n\t// recover by trying smaller step sizes\n\tbool cost_invalid = !std::isfinite(new_cost);\n\n\tif (new_cost >= cost_function_ || cost_invalid) {\n\t  // We haven't managed to reduce the cost function: increase\n\t  // damping value to take smaller steps\n\t  if (damping <= 0.0) {\n\t    damping = levenberg_damping_restart_;\n\t  }\n\t  else if (damping < levenberg_damping_max_) {\n\t    damping *= levenberg_damping_multiplier_;\n\t  }\n\t  else {\n\t    // The damping value is now larger than the maximum so we\n\t    // can get no further\n\t    if (cost_invalid) {\n\t      status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\t    }\n\t    else {\n\t      status_ = MINIMIZER_STATUS_FAILED_TO_CONVERGE;\n\t    }\n\t    break;\n\t  }\n\t}\n\telse {\n\t  // Managed to reduce cost function\n\t  x = new_x;\n\t  n_iterations_++;\n\t  if (frac < 1.0) {\n\t    // Found a new bound\n\t    bound_status(ifree(ibound)) = bound_type;\n\t  }\n\t  // Reduce damping for next iteration\n\t  if (damping > levenberg_damping_min_) {\n\t    damping /= levenberg_damping_divider_;\n\t  }\n\t  else {\n\t    damping = 0.0;\n\t  }\n\t  if (n_iterations_ >= max_iterations_) {\n\t    status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED;\n\t  }\n\t  break;\n\t}\n      } // Inner loop\n    }\n    while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED);\n    \n    if (state_up_to_date < ensure_updated_state_) {\n      // The last call to calc_cost_function* was not with the state\n      // vector returned to the user, and they want it to be.  Note\n      // that the cost function and gradient norm ought to be\n      // up-to-date already at this point.\n      if (ensure_updated_state_ > 0) {\n\t// User wants at least the first derivative, but\n\t// calc_cost_function_gradient() is not guaranteed to be\n\t// present so we call the hessain function\n\tcost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient,\n\t\t\t\t\t\t\t\t\t hessian);\n      }\n      else {\n\t// User does not need derivatives to have been computed\n\tcost_function_ = optimizable.calc_cost_function(x);\n      }\n    }\n\n    return status_;\n  }\n\n};\n"
  },
  {
    "path": "adept/minimize_limited_memory_bfgs.cpp",
    "content": "/* minimize_limited_memory_bfgs.cpp -- Minimize function using Limited-Memory BFGS algorithm\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <limits>\n\n#include <adept/Minimizer.h>\n\nnamespace adept {\n\n  // Structure for storing data from previous iterations used by\n  // L-BFGS minimization algorithm\n  class LbfgsData {\n\n  public:\n    LbfgsData(int nx, int ni)\n      : nx_(nx), ni_(ni), iteration_(0) {\n      x_diff_.resize(ni,nx);\n      gradient_diff_.resize(ni,nx);\n      rho_.resize(ni);\n      alpha_.resize(ni);\n      gamma_.resize(ni);\n    }\n\n    // Return false if the dot product of x_diff and gradient_diff is\n    // zero, true otherwise\n    void store(int iter, const Vector& x_diff, const Vector& gradient_diff) {\n      int index = (iter-1) % ni_;\n      x_diff_[index] = x_diff;\n      gradient_diff_[index] = gradient_diff;\n      Real dp = dot_product(x_diff, gradient_diff);\n      if (std::fabs(dp) > 10.0*std::numeric_limits<Real>::min()) {\n\trho_[index] = 1.0 / dp;\n      }\n      else if (dp >= 0.0) {\n\trho_[index] = 1.0 / std::max(dp, 10.0*std::numeric_limits<Real>::min());\n      }\n      else {\n\trho_[index] = 1.0 / std::min(dp, -10.0*std::numeric_limits<Real>::min());\n      }\n    }\n\n    // Return read-only vectors containing the differences between\n    // state vectors and gradients at sequential iterations, by\n    // slicing off the appropriate row of the matrix\n    Vector x_diff(int iter) {\n      return x_diff_[iter % ni_];\n    };\n    Vector gradient_diff(int iter) {\n      return gradient_diff_[iter % ni_];\n    };\n\n    Real& alpha(int iter) { return alpha_[iter % ni_]; }\n    Real rho(int iter) const { return rho_[iter % ni_]; }\n    Real gamma(int iter) const { return gamma_[iter % ni_]; }\n\n  private:\n    // Data\n    int nx_; // Number of state variables\n    int ni_; // Number of iterations to store\n    int iteration_; // Current iteration\n    Matrix x_diff_;\n    Matrix gradient_diff_;\n    Vector rho_;\n    Vector alpha_;\n    Vector gamma_;\n  };\n\n\n  // Minimize the cost function embodied in \"optimizable\" using the\n  // Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS)\n  // algorithm, where \"x\" is the initial state vector and also where\n  // the solution is stored.\n  MinimizerStatus\n  Minimizer::minimize_limited_memory_bfgs(Optimizable& optimizable, Vector x)\n  {\n\n    int nx = x.size();\n\n    // Initial values\n    n_iterations_ = 0;\n    n_samples_ = 0;\n    status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    cost_function_ = std::numeric_limits<Real>::infinity();\n\n    Vector previous_x(nx);\n    Vector gradient(nx);\n    Vector previous_gradient(nx);\n    Vector direction(nx);\n    Vector test_x(nx); // Used by the line search only\n\n    // Previous states needed by the L-BFGS algorithm\n    int n_states = std::min(nx, lbfgs_n_states_);\n    LbfgsData data(nx, n_states);\n\n    // Does the last calculation of the cost function in \"optimizable\"\n    // match the current contents of the state vector x? -1=no, 0=yes,\n    // 1=yes and the last calculation included the gradient, 2=yes and\n    // the last calculation included gradient and Hessian.\n    int state_up_to_date = -1;\n\n    // Initial step size\n    Real step_size = 1.0;\n    if (max_step_size_ > 0.0) {\n      step_size = max_step_size_;\n    }\n\n    // Main loop\n    while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) {\n\n      // If the last line search found a minimum along the lines\n      // satisfying the Wolfe conditions, then the current cost\n      // function and gradient will be consistent with the current\n      // state vector.  Otherwise we need to compute them.\n      if (state_up_to_date < 1) {\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n\tstate_up_to_date = 1;\n\t++n_samples_;\n\n\tif (n_iterations_ == 0) {\n\t  start_cost_function_ = cost_function_;\n\t}\n\n\t// Check cost function and gradient are finite\n\tif (!std::isfinite(cost_function_)) {\n\t  status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\t  break;\n\t}\n\telse if (any(!isfinite(gradient))) {\n\t  status_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\t  break;\n\t}\n      }\n\n      // Check cost function and gradient are finite\n      if (!std::isfinite(cost_function_)) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\tbreak;\n      }\n      else if (any(!isfinite(gradient))) {\n\tstatus_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\tbreak;\n      }\n\n      // Compute L2 norm of gradient to see how \"flat\" the environment\n      // is\n      gradient_norm_ = norm2(gradient);\n\n      // Report progress using user-defined function\n      optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_);\n\n      // Convergence has been achieved if the L2 norm has been reduced\n      // to a user-specified threshold\n      if (gradient_norm_ <= converged_gradient_norm_) {\n\tstatus_ = MINIMIZER_STATUS_SUCCESS;\n\tbreak;\n      }\n\n      // Store state and gradient differences\n      if (n_iterations_ > 0) {\n\tdata.store(n_iterations_, x-previous_x, gradient-previous_gradient);\n      }\n\n      // Find search direction: see page 779 of Nocedal (1980):\n      // Updating quasi-Newton matrices with limited\n      // storage. Mathematics of Computation, 35, 773-782.\n      direction = gradient;\n      if (n_iterations_ > 0) {\n\n\tfor (int ii = n_iterations_-1;\n\t     ii >= std::max(0,n_iterations_-n_states);\n\t     --ii) {\n\t  data.alpha(ii) = data.rho(ii) \n\t    * dot_product(data.x_diff(ii), direction);\n\t  direction -= data.alpha(ii) * data.gradient_diff(ii);\n\t}\n\n\tReal gamma = dot_product(x-previous_x, gradient-previous_gradient)\n\t  / std::max(10.0*std::numeric_limits<Real>::min(),\n\t\t     dot_product(gradient-previous_gradient, gradient-previous_gradient));\n\tdirection *= gamma;\n\n\tfor (int ii = std::max(0,n_iterations_-n_states);\n\t     ii < n_iterations_;\n\t     ++ii) {\n\t  Real beta = data.rho(ii) * dot_product(data.gradient_diff(ii), direction);\n\t  direction += data.x_diff(ii) * (data.alpha(ii)-beta);\n\t}\n\n\tdirection = -direction;\n      }\n      else {\n\tdirection = -gradient * (step_size / norm2(gradient));\n      }\n\n      // Store state and gradient\n      previous_x = x;\n      previous_gradient = gradient;\n\n      // Perform line search, storing new state vector in x, and\n      // returning MINIMIZER_STATUS_NOT_YET_CONVERGED on success\n      Real curvature_coeff = lbfgs_curvature_coeff_;\n      if (n_iterations_ < n_states) {\n\t// In the early iterations we require the line search to be\n\t// more accurate since the L-BFGS update will have fewer\n\t// states to make a good estimate of the minimum; interpolate\n\t// between the Conjugate Gradient and L-BFGS curvature\n\t// coefficients\n\tcurvature_coeff = (cg_curvature_coeff_ * (n_states-n_iterations_)\n\t\t\t   + lbfgs_curvature_coeff_ * n_iterations_)\n\t  / n_states;\n      }\n\n      // Direction points to the best estimate of the actual location\n      // of the minimum, so the step size is the norm of the direction\n      // vector\n      step_size = norm2(direction);\n      MinimizerStatus ls_status\n\t= line_search(optimizable, x, direction,\n\t\t      test_x, step_size, gradient, state_up_to_date,\n\t\t      curvature_coeff);\n\n      if (ls_status == MINIMIZER_STATUS_SUCCESS) {\n\t// Successfully minimized along search direction: continue to\n\t// next iteration\n\tstatus_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n      }\n      else {\n\t// Unrecoverable failure in line-search: return status to\n\t// calling function\n\tstatus_ = ls_status;\n      }\n\n      ++n_iterations_;\n      if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED\n\t  && n_iterations_ >= max_iterations_) {\n\tstatus_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED;\n      }\n\n      // End of main loop: if status_ is anything other than\n      // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations\n      // are performed\n    }\n     \n    if (state_up_to_date < ensure_updated_state_) {\n      // The last call to calc_cost_function* was not with the state\n      // vector returned to the user, and they want it to be.\n      if (ensure_updated_state_ > 0) {\n\t// User wants at least the first derivative\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n      }\n      else {\n\t// User does not need derivatives to have been computed\n\tcost_function_ = optimizable.calc_cost_function(x);\n      }\n    }\n\n    return status_;\n  }\n\n  // Minimize the cost function embodied in \"optimizable\" using the\n  // Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS)\n  // algorithm, where \"x\" is the initial state vector and also where\n  // the solution is stored.\n  MinimizerStatus\n  Minimizer::minimize_limited_memory_bfgs_bounded(Optimizable& optimizable, Vector x,\n\t\t\t\t\t\t  const Vector& min_x,\n\t\t\t\t\t\t  const Vector& max_x)\n  {\n    if (any(min_x >= max_x)\n\t|| min_x.size() != x.size()\n\t|| max_x.size() != x.size()) {\n      return MINIMIZER_STATUS_INVALID_BOUNDS;\n    }\n\n    int nx = x.size();\n\n    // Initial values\n    n_iterations_ = 0;\n    n_samples_ = 0;\n    status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n    cost_function_ = std::numeric_limits<Real>::infinity();\n\n    Vector previous_x(nx);\n    Vector gradient(nx);\n    Vector previous_gradient(nx);\n    Vector direction(nx);\n    Vector test_x(nx); // Used by the line search only\n\n    // Previous states needed by the L-BFGS algorithm\n    int n_states = std::min(nx, lbfgs_n_states_);\n    LbfgsData data(nx, n_states);\n\n    // Which state variables are at the minimum bound (-1), maximum\n    // bound (1) or free (0)?\n    intVector bound_status(nx);\n    bound_status = 0;\n\n    // Ensure that initial x lies within the specified bounds\n    bound_status.where(x >= max_x) =  1;\n    bound_status.where(x <= min_x) = -1;\n    x = max(min_x, min(x, max_x));\n\n    int nbound = count(bound_status != 0);\n    int nfree  = nx - nbound;\n\n    // Floating-point number containing 1.0 if unbound and 0.0 if\n    // bound\n    Vector unbound_status(nx);\n    unbound_status = 1.0-fabs(bound_status);\n\n    // If we reach a bound we need to restart the L-BFGS storage, so\n    // store the iteration at the last restart\n    int iteration_last_restart = 0;\n\n    // Does the last calculation of the cost function in \"optimizable\"\n    // match the current contents of the state vector x? -1=no, 0=yes,\n    // 1=yes and the last calculation included the gradient, 2=yes and\n    // the last calculation included gradient and Hessian.\n    int state_up_to_date = -1;\n\n    // Initial step size\n    Real step_size = 1.0;\n    if (max_step_size_ > 0.0) {\n      step_size = max_step_size_;\n    }\n\n    // Main loop\n    while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) {\n\n      // If the last line search found a minimum along the lines\n      // satisfying the Wolfe conditions, then the current cost\n      // function and gradient will be consistent with the current\n      // state vector.  Otherwise we need to compute them.\n      if (state_up_to_date < 1) {\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n\tstate_up_to_date = 1;\n\t++n_samples_;\n\n\tif (n_iterations_ == 0) {\n\t  start_cost_function_ = cost_function_;\n\t}\n\n\t// Check cost function and gradient are finite\n\tif (!std::isfinite(cost_function_)) {\n\t  status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION;\n\t  break;\n\t}\n\telse if (any(!isfinite(gradient))) {\n\t  status_ = MINIMIZER_STATUS_INVALID_GRADIENT;\n\t  break;\n\t}\n      }\n\n      // Check whether the bound status of each state variable is\n      // consistent with the gradient if a steepest descent were to be\n      // taken, and if not flag a restart\n      if (any(bound_status == -1 && gradient < 0.0)\n\t  || any(bound_status == 1 && gradient > 0.0)) {\n\tbound_status.where(bound_status == -1 && gradient < 0.0) = 0;\n\tbound_status.where(bound_status ==  1 && gradient > 0.0) = 0;\n\tunbound_status = 1.0-fabs(bound_status);\n\titeration_last_restart = n_iterations_;\n      }\n      nbound = count(bound_status != 0);\n      nfree = nx - nbound;\n\n      // Set gradient at bound points to zero\n      gradient.where(bound_status != 0) = 0.0;\n\n      // Compute L2 norm of gradient to see how \"flat\" the environment\n      // is\n      if (nfree > 0) {\n\tgradient_norm_ = norm2(gradient);\n      }\n      else {\n\t// If no dimensions are in play we are at a corner of the\n\t// bounds and the gradient is pointing into the corner: we\n\t// have reached a minimum in the cost function subject to the\n\t// bounds so have converged\n\tgradient_norm_ = 0.0;\n      }\n\n      // Report progress using user-defined function\n      optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_);\n\n      // Convergence has been achieved if the L2 norm has been reduced\n      // to a user-specified threshold\n      if (gradient_norm_ <= converged_gradient_norm_) {\n\tstatus_ = MINIMIZER_STATUS_SUCCESS;\n\tbreak;\n      }\n\n      // Store state and gradient differences\n      if (n_iterations_ > iteration_last_restart) {\n\tdata.store(n_iterations_, x-previous_x, gradient-previous_gradient);\n      }\n\n      // Find search direction: see page 779 of Nocedal (1980):\n      // Updating quasi-Newton matrices with limited\n      // storage. Mathematics of Computation, 35, 773-782.\n      direction = gradient;\n      if (n_iterations_ > iteration_last_restart) {\n\n\tfor (int ii = n_iterations_-1;\n\t     ii >= std::max(iteration_last_restart,n_iterations_-n_states);\n\t     --ii) {\n\t  data.alpha(ii) = data.rho(ii) \n\t    * dot_product(data.x_diff(ii), direction);\n\t  direction -= data.alpha(ii) * data.gradient_diff(ii);\n\t}\n\n\tReal gamma = dot_product(x-previous_x, gradient-previous_gradient)\n\t  / std::max(10.0*std::numeric_limits<Real>::min(),\n\t\t     dot_product(gradient-previous_gradient, gradient-previous_gradient));\n\tdirection *= gamma;\n\n\tfor (int ii = std::max(iteration_last_restart,n_iterations_-n_states);\n\t     ii < n_iterations_;\n\t     ++ii) {\n\t  Real beta = data.rho(ii) * dot_product(data.gradient_diff(ii), direction);\n\t  direction += data.x_diff(ii) * (data.alpha(ii)-beta);\n\t}\n\n\tdirection = -direction;\n      }\n      else {\n\t// We are either at the first iteration or have restarted\n\t// having changed the bound dimensions: use steepest descent\n\tdirection = -gradient * (step_size / norm2(gradient));\n      }\n\n      // Store state and gradient\n      previous_x = x;\n      previous_gradient = gradient;\n\n      // Perform line search, storing new state vector in x, and\n      // returning MINIMIZER_STATUS_NOT_YET_CONVERGED on success\n      Real curvature_coeff = lbfgs_curvature_coeff_;\n      int n_stored_iterations = n_iterations_ - iteration_last_restart;\n      if (n_stored_iterations < n_states) {\n\t// In the early iterations we require the line search to be\n\t// more accurate since the L-BFGS update will have fewer\n\t// states to make a good estimate of the minimum; interpolate\n\t// between the Conjugate Gradient and L-BFGS curvature\n\t// coefficients\n\tcurvature_coeff = (cg_curvature_coeff_ * (n_states-n_stored_iterations)\n\t\t\t   + lbfgs_curvature_coeff_ * n_stored_iterations)\n\t  / n_states;\n      }\n\n      // Direction points to the best estimate of the actual location\n      // of the minimum, so the step size is the norm of the direction\n      // vector\n      step_size = norm2(direction);\n\n      // Distance to the nearest bound\n      Real dir_scaling = step_size;\n      Real bound_step_size = std::numeric_limits<Real>::max();\n      int i_nearest_bound = -1;\n      int i_bound_type = 0;\n      // Work out the maximum step size along \"direction\" before a\n      // bound is met... there must be a faster way to do this\n      for (int ix = 0; ix < nx; ++ix) {\n\tif (direction(ix) > 0.0 && max_x(ix) < std::numeric_limits<Real>::max()) {\n\t  Real local_bound_step_size = dir_scaling*(max_x(ix)-x(ix))/direction(ix);\n\t  if (bound_step_size >= local_bound_step_size) {\n\t    bound_step_size = local_bound_step_size;\n\t    i_nearest_bound = ix;\n\t    i_bound_type = 1;\n\t  }\t\t\t\t   \n\t}\n\telse if (direction(ix) < 0.0 && min_x(ix) > -std::numeric_limits<Real>::max()) {\n\t  Real local_bound_step_size = dir_scaling*(min_x(ix)-x(ix))/direction(ix);\n\t  if (bound_step_size >= local_bound_step_size) {\n\t    bound_step_size = local_bound_step_size;\n\t    i_nearest_bound = ix;\n\t    i_bound_type = -1;\n\t  }\n\t}\n      }\n\n      MinimizerStatus ls_status; // line-search outcome\n      if (i_nearest_bound >= 0) {\n\t// Perform line search, storing new state vector in x\n\tls_status = line_search(optimizable, x, direction,\n\t\t\t\ttest_x, step_size, gradient, state_up_to_date,\n\t\t\t\tcurvature_coeff, bound_step_size);\n\tif (ls_status == MINIMIZER_STATUS_BOUND_REACHED) {\n\t  bound_status(i_nearest_bound) = i_bound_type;\n\t  // Restart the L-BFGS storage\n\t  iteration_last_restart = n_iterations_+1;\n\t  ls_status = MINIMIZER_STATUS_SUCCESS;\n\t}\n      }\n      else {\n\t// Perform line search, storing new state vector in x\n\tls_status = line_search(optimizable, x, direction,\n\t\t\t\ttest_x, step_size, gradient, state_up_to_date,\n\t\t\t\tcurvature_coeff);\n      }\n\n      if (ls_status == MINIMIZER_STATUS_SUCCESS) {\n\t// Successfully minimized along search direction: continue to\n\t// next iteration\n\tstatus_ = MINIMIZER_STATUS_NOT_YET_CONVERGED;\n      }\n      else {\n\t// Unrecoverable failure in line-search: return status to\n\t// calling function\n\tstatus_ = ls_status;\n      }\n\n      ++n_iterations_;\n      if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED\n\t  && n_iterations_ >= max_iterations_) {\n\tstatus_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED;\n      }\n\n      // End of main loop: if status_ is anything other than\n      // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations\n      // are performed\n    }\n     \n    if (state_up_to_date < ensure_updated_state_) {\n      // The last call to calc_cost_function* was not with the state\n      // vector returned to the user, and they want it to be.\n      if (ensure_updated_state_ > 0) {\n\t// User wants at least the first derivative\n\tcost_function_ = optimizable.calc_cost_function_gradient(x, gradient);\n      }\n      else {\n\t// User does not need derivatives to have been computed\n\tcost_function_ = optimizable.calc_cost_function(x);\n      }\n    }\n\n    return status_;\n  }\n\n};\n"
  },
  {
    "path": "adept/settings.cpp",
    "content": "/* settings.cpp -- View/change the overall Adept settings\n\n    Copyright (C) 2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <sstream>\n#include <cstring>\n\n#include <adept/base.h>\n#include <adept/settings.h>\n\n#ifdef HAVE_CONFIG_H\n#include \"config.h\"\n#endif\n\n#ifdef HAVE_OPENBLAS_CBLAS_HEADER\n#include <cblas.h>\n#endif\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Get compile-time settings\n  // -------------------------------------------------------------------\n\n  // Return the version of Adept at compile time\n  std::string\n  version()\n  {\n    return ADEPT_VERSION_STR;\n  }\n\n  // Return the compiler used to compile the Adept library (e.g. \"g++\n  // [4.3.2]\" or \"Microsoft Visual C++ [1800]\")\n  std::string\n  compiler_version()\n  {\n#ifdef CXX\n    std::string cv = CXX; // Defined in config.h\n#elif defined(_MSC_VER)\n    std::string cv = \"Microsoft Visual C++\";\n#else\n    std::string cv = \"unknown\";\n#endif\n\n#ifdef __GNUC__\n\n#define STRINGIFY3(A,B,C) STRINGIFY(A) \".\" STRINGIFY(B) \".\" STRINGIFY(C)\n#define STRINGIFY(A) #A\n    cv += \" [\" STRINGIFY3(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) \"]\";\n#undef STRINGIFY\n#undef STRINGIFY3\n\n#elif defined(_MSC_VER)\n\n#define STRINGIFY1(A) STRINGIFY(A)\n#define STRINGIFY(A) #A\n    cv += \" [\" STRINGIFY1(_MSC_VER) \"]\";\n#undef STRINGIFY\n#undef STRINGIFY1\n\n#endif\n    return cv;\n  }\n\n  // Return the compiler flags used when compiling the Adept library\n  // (e.g. \"-Wall -g -O3\")\n  std::string\n  compiler_flags()\n  {\n#ifdef CXXFLAGS\n    return CXXFLAGS; // Defined in config.h\n#else\n    return \"unknown\";\n#endif\n  }\n\n  // Return a multi-line string listing numerous aspects of the way\n  // Adept has been configured.\n  std::string\n  configuration()\n  {\n    std::stringstream s;\n    s << \"Adept version \" << adept::version() << \":\\n\";\n    s << \"  Compiled with \" << adept::compiler_version() << \"\\n\";\n    s << \"  Compiler flags \\\"\" << adept::compiler_flags() << \"\\\"\\n\";\n#ifdef BLAS_LIBS\n    if (std::strlen(BLAS_LIBS) > 2) {\n      const char* blas_libs = &BLAS_LIBS[2];\n      s << \"  BLAS support from \" << blas_libs << \" library\\n\";\n    }\n    else {\n      s << \"  BLAS support from built-in library\\n\";\n    }\n#endif\n#ifdef HAVE_OPENBLAS_CBLAS_HEADER\n    s << \"  Number of BLAS threads may be specified up to maximum of \"\n      << max_blas_threads() << \"\\n\";\n#endif\n    s << \"  Jacobians processed in blocks of size \" \n      << ADEPT_MULTIPASS_SIZE << \"\\n\";\n    return s.str();\n  }\n\n\n  // -------------------------------------------------------------------\n  // Get/set number of threads for array operations\n  // -------------------------------------------------------------------\n\n  // Get the maximum number of threads available for BLAS operations\n  int\n  max_blas_threads()\n  {\n#ifdef HAVE_OPENBLAS_CBLAS_HEADER\n    return openblas_get_num_threads();\n#else\n    return 1;\n#endif\n  }\n\n  // Set the maximum number of threads available for BLAS operations\n  // (zero means use the maximum sensible number on the current\n  // system), and return the number actually set. Note that OpenBLAS\n  // uses pthreads and the Jacobian calculation uses OpenMP - this can\n  // lead to inefficient behaviour so if you are computing Jacobians\n  // then you may get better performance by setting the number of\n  // array threads to one.\n  int\n  set_max_blas_threads(int n)\n  {\n#ifdef HAVE_OPENBLAS_CBLAS_HEADER\n    openblas_set_num_threads(n);\n    return openblas_get_num_threads();\n#else\n    return 1;\n#endif\n  }\n\n  // Was the library compiled with matrix multiplication support (from\n  // BLAS)?\n  bool\n  have_matrix_multiplication() {\n#ifdef HAVE_BLAS\n    return true;\n#else\n    return false;\n#endif\n  }\n\n  // Was the library compiled with linear algebra support (e.g. inv\n  // and solve from LAPACK)\n  bool\n  have_linear_algebra() {\n#ifdef HAVE_LAPACK\n    return true;\n#else\n    return false;\n#endif\n  }\n\n} // End namespace adept\n"
  },
  {
    "path": "adept/solve.cpp",
    "content": "/* solve.cpp -- Solve systems of linear equations using LAPACK\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                             \n\n#include <vector>\n\n#include <adept/solve.h>\n#include <adept/Array.h>\n#include <adept/SpecialMatrix.h>\n\n// If ADEPT_SOURCE_H is defined then we are in a header file generated\n// from all the source files, so cpplapack.h will already have been\n// included\n#ifndef AdeptSource_H\n#include \"cpplapack.h\"\n#endif\n\n#ifdef HAVE_LAPACK\n\nnamespace adept {\n\n  using namespace internal;\n  \n  // -------------------------------------------------------------------\n  // Solve Ax = b for general square matrix A\n  // -------------------------------------------------------------------\n  template <typename T>\n  Array<1,T,false> \n  solve(const Array<2,T,false>& A, const Array<1,T,false>& b) {\n    Array<2,T,false> A_;\n    Array<1,T,false> b_;\n\n    // LAPACKE is more efficient with column-major input\n    // if (A.is_row_contiguous()) {\n      A_.resize_column_major(A.dimensions());\n      A_ = A;\n    // }\n    // else {\n    //   A_.link(A);\n    // }\n\n    // if (b_.offset(0) != 0) {\n      b_ = b;\n    // }\n    // else {\n    //   b_.link(b);\n    // }\n\n    std::vector<lapack_int> ipiv(A_.dimension(0));\n\n    //    lapack_int status = LAPACKE_dgesv(LAPACK_COL_MAJOR, A_.dimension(0), 1,\n    //\t\t\t\t      A_.data(), A_.offset(1), &ipiv[0],\n    //\t\t\t\t      b_.data(), b_.dimension(0));\n    lapack_int status = cpplapack_gesv(A_.dimension(0), 1,\n\t\t\t\t       A_.data(), A_.offset(1), &ipiv[0],\n\t\t\t\t       b_.data(), b_.dimension(0));\n\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to solve general system of equations: LAPACK ?gesv returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n    return b_;    \n  }\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for general square matrix A and rectangular matrix B\n  // -------------------------------------------------------------------\n  template <typename T>\n  Array<2,T,false> \n  solve(const Array<2,T,false>& A, const Array<2,T,false>& B) {\n    Array<2,T,false> A_;\n    Array<2,T,false> B_;\n    \n    // LAPACKE is more efficient with column-major input\n    // if (A.is_row_contiguous()) {\n      A_.resize_column_major(A.dimensions());\n      A_ = A;\n    // }\n    // else {\n    //   A_.link(A);\n    // }\n\n    // if (B.is_row_contiguous()) {\n      B_.resize_column_major(B.dimensions());\n      B_ = B;\n    // }\n    // else {\n    //   B_.link(B);\n    // }\n\n    std::vector<lapack_int> ipiv(A_.dimension(0));\n\n    //    lapack_int status = LAPACKE_dgesv(LAPACK_COL_MAJOR, A_.dimension(0), B.dimension(1),\n    //\t\t\t\t      A_.data(), A_.offset(1), &ipiv[0],\n    //\t\t\t\t      B_.data(), B_.offset(1));\n    lapack_int status = cpplapack_gesv(A_.dimension(0), B.dimension(1),\n\t\t\t\t       A_.data(), A_.offset(1), &ipiv[0],\n\t\t\t\t       B_.data(), B_.offset(1));\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to solve general system of equations for matrix RHS: LAPACK ?gesv returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n    return B_;    \n  }\n\n\n  // -------------------------------------------------------------------\n  // Solve Ax = b for symmetric square matrix A\n  // -------------------------------------------------------------------\n  template <typename T, SymmMatrixOrientation Orient>\n  Array<1,T,false>\n  solve(const SpecialMatrix<T,SymmEngine<Orient>,false>& A,\n\tconst Array<1,T,false>& b) {\n    SpecialMatrix<T,SymmEngine<Orient>,false> A_;\n    Array<1,T,false> b_;\n\n    // Not sure why the original code copies A...\n    A_.resize(A.dimension());\n    A_ = A;\n    // A_.link(A);\n\n    // if (b.offset(0) != 1) {\n      b_ = b;\n    // }\n    // else {\n    //   b_.link(b);\n    // }\n\n    // Treat symmetric matrix as column-major\n    char uplo;\n    if (Orient == ROW_LOWER_COL_UPPER) {\n      uplo = 'U';\n    }\n    else {\n      uplo = 'L';\n    }\n\n    std::vector<lapack_int> ipiv(A_.dimension());\n\n    //    lapack_int status = LAPACKE_dsysv(LAPACK_COL_MAJOR, uplo, A_.dimension(0), 1,\n    //\t\t\t\t      A_.data(), A_.offset(), &ipiv[0],\n    //\t\t\t\t      b_.data(), b_.dimension(0));\n    lapack_int status = cpplapack_sysv(uplo, A_.dimension(0), 1,\n\t\t\t\t       A_.data(), A_.offset(), &ipiv[0],\n\t\t\t\t       b_.data(), b_.dimension(0));\n\n    if (status != 0) {\n      //      std::stringstream s;\n      //      s << \"Failed to solve symmetric system of equations: LAPACK ?sysv returned code \" << status;\n      //      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n      std::cerr << \"Warning: LAPACK solve symmetric system failed (?sysv): trying general (?gesv)\\n\";\n      return solve(Array<2,T,false>(A_),b_);\n    }\n    return b_;    \n  }\n\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for symmetric square matrix A\n  // -------------------------------------------------------------------\n  template <typename T, SymmMatrixOrientation Orient>\n  Array<2,T,false>\n  solve(const SpecialMatrix<T,SymmEngine<Orient>,false>& A,\n\tconst Array<2,T,false>& B) {\n    SpecialMatrix<T,SymmEngine<Orient>,false> A_;\n    Array<2,T,false> B_;\n\n    A_.resize(A.dimension());\n    A_ = A;\n    // A_.link(A);\n\n    // if (B.is_row_contiguous()) {\n      B_.resize_column_major(B.dimensions());\n      B_ = B;\n    // }\n    // else {\n    //   B_.link(B);\n    // }\n\n    // Treat symmetric matrix as column-major\n    char uplo;\n    if (Orient == ROW_LOWER_COL_UPPER) {\n      uplo = 'U';\n    }\n    else {\n      uplo = 'L';\n    }\n\n    std::vector<lapack_int> ipiv(A_.dimension());\n\n    //    lapack_int status = LAPACKE_dsysv(LAPACK_COL_MAJOR, uplo, A_.dimension(0), B.dimension(1),\n    //\t\t\t\t      A_.data(), A_.offset(), &ipiv[0],\n    //\t\t\t\t      B_.data(), B_.offset(1));\n    lapack_int status = cpplapack_sysv(uplo, A_.dimension(0), B.dimension(1),\n\t\t\t\t       A_.data(), A_.offset(), &ipiv[0],\n\t\t\t\t       B_.data(), B_.offset(1));\n\n    if (status != 0) {\n      std::stringstream s;\n      s << \"Failed to solve symmetric system of equations with matrix RHS: LAPACK ?sysv returned code \" << status;\n      throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION));\n    }\n    return B_;\n  }\n\n}\n\n#else\n\nnamespace adept {\n  \n  using namespace internal;\n  \n  // -------------------------------------------------------------------\n  // Solve Ax = b for general square matrix A\n  // -------------------------------------------------------------------\n  template <typename T>\n  Array<1,T,false> \n  solve(const Array<2,T,false>& A, const Array<1,T,false>& b) {\n    throw feature_not_available(\"Cannot solve linear equations because compiled without LAPACK\");\n  }\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for general square matrix A and rectangular matrix B\n  // -------------------------------------------------------------------\n  template <typename T>\n  Array<2,T,false> \n  solve(const Array<2,T,false>& A, const Array<2,T,false>& B) {\n    throw feature_not_available(\"Cannot solve linear equations because compiled without LAPACK\");\n  }\n\n  // -------------------------------------------------------------------\n  // Solve Ax = b for symmetric square matrix A\n  // -------------------------------------------------------------------\n  template <typename T, SymmMatrixOrientation Orient>\n  Array<1,T,false>\n  solve(const SpecialMatrix<T,SymmEngine<Orient>,false>& A,\n\tconst Array<1,T,false>& b) {\n    throw feature_not_available(\"Cannot solve linear equations because compiled without LAPACK\");\n  }\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for symmetric square matrix A\n  // -------------------------------------------------------------------\n  template <typename T, SymmMatrixOrientation Orient>\n  Array<2,T,false>\n  solve(const SpecialMatrix<T,SymmEngine<Orient>,false>& A,\n\tconst Array<2,T,false>& B) {\n    throw feature_not_available(\"Cannot solve linear equations because compiled without LAPACK\");\n  }\n\n}\n\n#endif\n\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Explicit instantiations\n  // -------------------------------------------------------------------\n#define ADEPT_EXPLICIT_SOLVE(TYPE,RRANK)\t\t\t\t\\\n  template Array<RRANK,TYPE,false>\t\t\t\t\t\\\n  solve(const Array<2,TYPE,false>& A, const Array<RRANK,TYPE,false>& b); \\\n  template Array<RRANK,TYPE,false>\t\t\t\t\t\\\n  solve(const SpecialMatrix<TYPE,SymmEngine<ROW_LOWER_COL_UPPER>,false>& A, \\\n\tconst Array<RRANK,TYPE,false>& b);\t\t\t\t\t\\\n  template Array<RRANK,TYPE,false>\t\t\t\t\t\\\n  solve(const SpecialMatrix<TYPE,SymmEngine<ROW_UPPER_COL_LOWER>,false>& A, \\\n\tconst Array<RRANK,TYPE,false>& b);\n\n  ADEPT_EXPLICIT_SOLVE(float,1)\n  ADEPT_EXPLICIT_SOLVE(float,2)\n  ADEPT_EXPLICIT_SOLVE(double,1)\n  ADEPT_EXPLICIT_SOLVE(double,2)\n#undef ADEPT_EXPLICIT_SOLVE\n\n}\n\n"
  },
  {
    "path": "adept/vector_utilities.cpp",
    "content": "/* vector_utilities.cpp -- Vector utility functions\n\n    Copyright (C) 2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#include <adept/vector_utilities.h>\n\nnamespace adept {\n\n  Array<1,Real,false>\n  linspace(Real x1, Real x2, Index n) {\n    Array<1,Real,false> ans(n);\n    if (n > 1) {\n      for (Index i = 0; i < n; ++i) {\n\tans(i) = x1 + (x2-x1)*i / static_cast<Real>(n-1);\n      }\n    }\n    else if (n == 1 && x1 == x2) {\n      ans(0) = x1;\n      return ans;\n    }\n    else if (n == 1) {\n      throw(invalid_operation(\"linspace(x1,x2,n) with n=1 only valid if x1=x2\"));\n    }\n    return ans;\n  }\n\n}\n\n"
  },
  {
    "path": "benchmark/Makefile.am",
    "content": "check_PROGRAMS = autodiff_benchmark animate matrix_benchmark math_benchmark\nautodiff_benchmark_SOURCES = autodiff_benchmark.cpp \\\n\tdifferentiator.h advection_schemes.h \\\n\tadvection_schemes_AD.h advection_schemes_K.h nx.h\n\nautodiff_benchmark_CPPFLAGS = -I@top_srcdir@/include\nautodiff_benchmark_LDFLAGS = -static -no-install -L@top_srcdir@/adept/.libs\nautodiff_benchmark_LDADD = -ladept\n\nanimate_SOURCES = animate.cpp\nanimate_CPPFLAGS = -I@top_srcdir@/include\n\nmatrix_benchmark_SOURCES = matrix_benchmark.cpp\nmatrix_benchmark_CPPFLAGS = -I@top_srcdir@/include\nmatrix_benchmark_LDFLAGS = -static -no-install -L@top_srcdir@/adept/.libs\nmatrix_benchmark_LDADD = -ladept\n\nmath_benchmark_SOURCES = math_benchmark.cpp\nmath_benchmark_CPPFLAGS = -I@top_srcdir@/include\nmath_benchmark_LDFLAGS = -static -no-install -L@top_srcdir@/adept/.libs\nmath_benchmark_LDADD = -ladept\n"
  },
  {
    "path": "benchmark/advection_schemes.h",
    "content": "/* advection_schemes.h - Two test advection algorithms from the Adept paper\n\n  Copyright (C) 2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// Use templates so that these functions can be easily compiled with\n// different automatic differentiation tools in order that the\n// performance of these tools can be compared.\n\n#ifndef ADVECTION_SCHEMES_H\n#define ADVECTION_SCHEMES_H 1\n\n#include <cmath>\n\n// Use a fixed problem size\n#include \"nx.h\"\n\n// Lax-Wendroff scheme applied to linear advection\ntemplate <class aReal, typename Real>\nvoid lax_wendroff(int nt, Real c, const aReal q_init[NX], aReal q[NX]) {\n  aReal flux[NX-1];                        // Fluxes between boxes\n  for (int i=0; i<NX; i++) q[i] = q_init[i]; // Initialize q \n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    for (int i=0; i<NX-1; i++) flux[i] = 0.5*c*(q[i]+q[i+1]+c*(q[i]-q[i+1]));\n    for (int i=1; i<NX-1; i++) q[i] += flux[i-1]-flux[i];\n    q[0] = q[NX-2]; q[NX-1] = q[1];          // Treat boundary conditions\n  }\n}\n\n// Toon advection scheme applied to linear advection\ntemplate <class aReal, typename Real>\nvoid toon(int nt, Real c, const aReal q_init[NX], aReal q[NX]) {\n  aReal flux[NX-1];                        // Fluxes between boxes\n  for (int i=0; i<NX; i++) q[i] = q_init[i]; // Initialize q\n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    for (int i=0; i<NX-1; i++) {\n      // Need to check if the difference between adjacent points is\n      // not too small or we end up with close to 0/0.  Unfortunately\n      // the \"fabs\" function is not always available in CppAD, hence\n      // the following.\n      //      aReal bigdiff = (q[i]-q[i+1])*1.0e6;\n      //      if (bigdiff > q[i] || bigdiff < -q[i]) {\n\tflux[i] = (exp(c*log(q[i]/q[i+1]))-1.0)\n\t  * q[i]*q[i+1] / (q[i]-q[i+1]);\n\t//      }\n\t//      else {\n\t//\tflux[i] = c*q[i]; // Upwind scheme\n\t//      }\n    }\n    for (int i=1; i<NX-1; i++) q[i] += flux[i-1]-flux[i];\n    q[0] = q[NX-2]; q[NX-1] = q[1];          // Treat boundary conditions\n  }\n}\n\n#include \"adept_arrays.h\"\n\n\ntemplate <typename T> struct is_active { static const bool value = false; };\ntemplate <> struct is_active<adept::aReal> { static const bool value = true; };\n\n// Lax-Wendroff scheme applied to linear advection\ntemplate <typename aReal, typename Real>\nvoid lax_wendroff_vector(int nt, Real c, const aReal q_init[NX], \n\t\t\t aReal q[NX]) {\n  using namespace adept;\n  typedef adept::Array<1,Real,::is_active<aReal>::value> my_vector;\n  //  typedef adept::Array<1,Real,true> my_vector;\n  my_vector Q(NX);\n  my_vector F(NX-1);\n  my_vector Qleft = Q(range(0,end-1));\n  my_vector Qright = Q(range(1,end));\n  my_vector Qcentre = Q(range(1,end-1));\n  my_vector Fleft = F(range(0,end-1));\n  my_vector Fright = F(range(1,end));\n  for (int i=0; i<NX; i++) Q(i) = q_init[i]; // Initialize q \n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    F = 0.5*c*(Qleft+Qright+c*(Qleft-Qright));\n    Qcentre += Fleft-Fright;\n    Q(0) = Q(NX-2);\n    Q(NX-1) = Q(1);\n  }\n  for (int i=0; i<NX; i++) q[i] = Q(i);\n}\n\ntemplate <class aReal, typename Real>\nvoid toon_vector(int nt, Real c, const aReal q_init[NX], aReal q[NX]) {\n  using namespace adept;\n  typedef adept::Array<1,Real,::is_active<aReal>::value> my_vector;\n  my_vector Q(NX);\n  my_vector F(NX-1);\n  my_vector Qleft = Q(range(0,end-1));\n  my_vector Qright = Q(range(1,end));\n  my_vector Qcentre = Q(range(1,end-1));\n  my_vector Fleft = F(range(0,end-1));\n  my_vector Fright = F(range(1,end));\n  for (int i=0; i<NX; i++) Q(i) = q_init[i]; // Initialize q\n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    F = (exp(c*log(Qleft/Qright))-1.0)\n      * Qleft*Qright / (Qleft-Qright);\n    Qcentre += Fleft-Fright;\n    Q(0) = Q(NX-2);\n    Q(NX-1) = Q(1);\n  }\n  for (int i=0; i<NX; i++) q[i] = Q(i);\n}\n#endif\n"
  },
  {
    "path": "benchmark/advection_schemes_AD.h",
    "content": "/* advection_schemes_AD.h - Header for the hand-coded adjoints\n\n  Copyright (C) 2014 The University of Reading\n  Copyright (C) 2018 European Centre for Medium-Range Weather Forecasts\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#ifndef ADVECTION_SCHEMES_AD_H\n#define ADVECTION_SCHEMES_AD_H\n\n#include \"nx.h\"\n\n// Hand-coded adjoint of Lax-Wendroff advection scheme\ntemplate <typename real>\nvoid lax_wendroff_AD(int nt, real c, const real q_init[NX], real q[NX],\n\t\t     const real q_AD_const[NX], real q_init_AD[NX]) {\n  // Forward pass\n  real flux[NX-1];\n\n  for (int i = 0; i < NX; i++)   q[i] = q_init[i];\n\n  // Forward pass\n  for (int j = 0; j < nt; j++) {\n    for (int i = 0; i < NX-1; i++)  flux[i] = 0.5*c*(q[i]+q[i+1]+c*(q[i]-q[i+1]));\n    for (int i = 1; i < NX-1; i++)  q[i] += flux[i-1]-flux[i];\n    q[0] = q[NX-2]; q[NX-1] = q[1];  // Treat boundary conditions\n  }\n\n  real q_AD[NX];\n  real flux_AD[NX-1];\n  for (int i = 0; i < NX; i++) q_AD[i] = q_AD_const[i];\n  for (int i = 0; i < NX-1; i++) flux_AD[i] = 0.0;\n  \n  // Reverse pass\n  for (int j = nt-1; j >= 0; j--) {\n    q_AD[NX-2] += q_AD[0];\n    q_AD[0] = 0.0;\n    q_AD[1] += q_AD[NX-1];\n    q_AD[NX-1] = 0.0;\n\n    for(int i = 1; i < NX-1; i++) {\n      flux_AD[i-1] += q_AD[i];\n      flux_AD[i] -= q_AD[i];\n      //      q_AD[i] = 0.0;\n    }\n    real factor1 = 0.5*c*(1.0+c);\n    real factor2 = 0.5*c*(1.0-c);\n    for (int i = 0; i < NX-1; i++) {\n      q_AD[i] += factor1*flux_AD[i];\n      q_AD[i+1] += factor2*flux_AD[i];\n      flux_AD[i] = 0.0;\n    }\n  }\n  for (int i = 0; i < NX; i++) {\n    q_init_AD[i] = q_AD[i];\n    q_AD[i] = 0.0;\n  }\n}\n\n// Hand-coded adjoint of Toon advection scheme\ntemplate <typename real>\nvoid toon_AD(int nt, real c, const real q_init[NX], real q_out[NX],\n\t     const real q_AD_const[NX], real q_init_AD[NX]) {\n  // Forward pass\n  real flux[NX-1];\n\n  real* q_save = new real[NX*(nt+1)];\n  //  real q_save[NX*(nt+1)];\n  real* q = &(q_save[0]);\n\n  for (int i = 0; i < NX; i++)   q[i] = q_init[i];\n\n  // Forward pass\n  for (int j = 0; j < nt; j++) {\n    for (int i=0; i<NX-1; i++) flux[i] = (exp(c*log(q[i]/q[i+1]))-1.0) \n                                         * q[i]*q[i+1] / (q[i]-q[i+1]);\n    q += NX;\n    for (int i = 1; i < NX-1; i++)  q[i] = q[i-NX]+flux[i-1]-flux[i];\n    q[0] = q[NX-2]; q[NX-1] = q[1];  // Treat boundary conditions\n  }\n\n  for (int i = 0; i < NX; i++) q_out[i] = q[i];\n\n  real q_AD[NX];\n  real flux_AD[NX-1];\n  for (int i = 0; i < NX; i++) q_AD[i] = q_AD_const[i];\n  for (int i = 0; i < NX-1; i++) flux_AD[i] = 0.0;\n  \n  // Reverse pass\n  for (int j = nt-1; j >= 0; j--) {\n    q_AD[NX-2] += q_AD[0];\n    q_AD[0] = 0.0;\n    q_AD[1] += q_AD[NX-1];\n    q_AD[NX-1] = 0.0;\n\n    for(int i = 1; i < NX-1; i++) {\n      flux_AD[i-1] += q_AD[i];\n      flux_AD[i] -= q_AD[i];\n      //      q_AD[i] = 0.0;\n    }\n    q -= NX;\n    for (int i = 0; i < NX-1; i++) {\n      real factor = exp(c*log(q[i]/q[i+1]));\n      real one_over_q_i = 1.0/q[i];\n      real one_over_q_i_plus_one = 1.0/q[i+1];\n\n      // Up to and including Adept 2.0.5 this was the incorrect line:\n      //      real one_over_denominator = 1.0/(one_over_q_i+one_over_q_i_plus_one);\n      // This is the corrected line:\n      real one_over_denominator = 1.0/(one_over_q_i_plus_one-one_over_q_i);\n\n      q_AD[i] += one_over_denominator*one_over_q_i\n\t* (c*factor - (factor-1.0)*one_over_denominator*one_over_q_i)\n\t* flux_AD[i];\n      q_AD[i+1] += one_over_denominator*one_over_q_i_plus_one\n\t* (- c*factor + (factor-1.0)*one_over_denominator*one_over_q_i_plus_one)\n\t* flux_AD[i];\n      flux_AD[i] = 0.0;\n    }\n  }\n  for (int i = 0; i < NX; i++) {\n    q_init_AD[i] = q_AD[i];\n    q_AD[i] = 0.0;\n  }\n\n  delete[] q_save;\n}\n\n#endif\n"
  },
  {
    "path": "benchmark/advection_schemes_K.h",
    "content": "/* advection_schemes_K.h - Header for hand-coded Jacobians\n\n  Copyright (C) 2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#ifndef ADVECTION_SCHEMES_K_H\n#define ADVECTION_SCHEMES_K_H\n\n#include <cmath>\n#include <iostream>\n\n#include \"nx.h\"\n\n// Lax-Wendroff scheme applied to linear advection\ntemplate <typename real>\nvoid lax_wendroff_K(int nt, real c, const real q_init[NX],\n\t\t    real q[NX], real jacobian[NX*NX]) {\n  real flux[NX-1];                         // Fluxes between boxes\n  real flux_K[NX-1][NX];                   // Flux Jacobian (dflux/dq_init)\n  //  real (&q_K)[NX][NX] = *reinterpret_cast<real(*)[NX][NX]>(jacobian);\n  real q_K[NX][NX];\n  real coeff1 = 0.5*c*(1.0+c);\n  real coeff2 = 0.5*c*(1.0-c);\n\n  for (int i=0; i<NX; i++) {\n    q[i] = q_init[i];                        // Initialize q \n    for (int k=0; k<NX; k++) {\n      q_K[i][k] = 0.0;                       // Initialize Jacobian\n    }\n    q_K[i][i] = 1.0;\n  }\n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    for (int i=0; i<NX-1; i++) {\n      flux[i] = 0.5*c*(q[i]+q[i+1]+c*(q[i]-q[i+1]));\n      for (int k=0; k<NX; k++) {\n\tflux_K[i][k] = coeff1*q_K[i][k] + coeff2*q_K[i+1][k];\n      }\n    }\n    for (int i=1; i<NX-1; i++) {\n      q[i] += flux[i-1]-flux[i];\n      for (int k=0; k<NX; k++) {\n\tq_K[i][k] += flux_K[i-1][k]-flux_K[i][k];\n      }\n    }\n    q[0] = q[NX-2]; q[NX-1] = q[1];          // Treat boundary conditions\n    for (int k=0; k<NX; k++) {\n      q_K[0][k] = q_K[NX-2][k];\n      q_K[NX-1][k] = q_K[1][k];\n    }\n  }\n\n  // Transpose the result\n  for (int i = 0, index = 0; i < NX; i++) {\n    for (int j = 0; j < NX; j++, index++) {\n      jacobian[index] = q_K[j][i];\n    }\n  }\n\n}\n\n\n// Toon advection scheme applied to linear advection\ntemplate <typename real>\nvoid toon_K(int nt, real c, const real q_init[NX], real q[NX],\n\t    real jacobian[NX*NX]) {\n  real flux[NX-1];                        // Fluxes between boxes\n  real flux_K[NX-1][NX];\n  real q_K[NX][NX];\n\n  for (int i=0; i<NX; i++) {\n    q[i] = q_init[i]; // Initialize q\n    for (int k=0; k<NX; k++) {\n      q_K[i][k] = 0.0;                       // Initialize Jacobian\n    }\n    q_K[i][i] = 1.0;\n  }\n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    for (int i=0; i<NX-1; i++) {\n      real coeff1, coeff2;\n      // Ought to check if the difference between adjacent points is\n      // not too small or we end up with close to 0/0, but this leads\n      // to different results from the automatic differentiation\n      //      if (fabs(q[i]-q[i+1]) > q[i]*1.0e-6) {\n\treal factor = exp(c*log(q[i]/q[i+1]));\n\treal one_over_denominator = 1.0/(q[i]-q[i+1]);\n\tcoeff1 = one_over_denominator*q[i+1]\n\t  * (c*factor + (factor-1.0)*(1.0-q[i]*one_over_denominator));\n\tcoeff2 = one_over_denominator*q[i]\n\t  * (- c*factor + (factor-1.0)*(1.0+q[i+1]*one_over_denominator));\n\tflux[i] = (factor-1.0) * q[i]*q[i+1]*one_over_denominator;\n\t/*\n      }\n      else {\n\tflux[i] = c*q[i]; // Upwind scheme\n\tcoeff1 = c;\n\tcoeff2 = 0.0;\n      }\n\t*/\n      for (int k=0; k<NX; k++) {\n\tflux_K[i][k] = coeff1*q_K[i][k] + coeff2*q_K[i+1][k];\n      }\n    }\n\n    for (int i=1; i<NX-1; i++) {\n      q[i] += flux[i-1]-flux[i];\n      for (int k=0; k<NX; k++) {\n\tq_K[i][k] += flux_K[i-1][k]-flux_K[i][k];\n      }\n    }\n    q[0] = q[NX-2]; q[NX-1] = q[1];          // Treat boundary conditions\n    for (int k=0; k<NX; k++) {\n      q_K[0][k] = q_K[NX-2][k];\n      q_K[NX-1][k] = q_K[1][k];\n    }\n  }\n\n  // Transpose the result\n  for (int i = 0, index = 0; i < NX; i++) {\n    for (int j = 0; j < NX; j++, index++) {\n      jacobian[index] = q_K[j][i];\n    }\n  }\n}\n\n#endif\n"
  },
  {
    "path": "benchmark/animate.cpp",
    "content": "/* animate.cpp - Visualize the advection\n\n  Copyright (C) 2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <string>\n#include <iostream>\n#include <time.h>\n\n#include \"advection_schemes.h\"\n\nint\nmain(int argc, char** argv)\n{\n  double q1_save[NX];\n  double q2_save[NX];\n  double* q1 = q1_save;\n  double* q2 = q2_save;\n  double pi = 4.0*atan(1.0);\n\n  double min_q = -0.2;\n  double max_q = 1.2;\n  double dq = 0.05;\n\n  double dt = 0.125;\n  int nt = 8;\n  int cycles = 5;\n\n  int j_min = min_q/dq;\n  int j_max = max_q/dq;\n\n  std::string line;\n  line.resize(NX);\n\n  timespec t;\n  t.tv_sec = 0;\n  t.tv_nsec = 20000000;\n\n  for (int i = 0; i < NX; i++) q1[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+0.0001;\n  for (int k = 0; k < cycles*NX/(nt*dt); k++) {\n    std::cout << \"\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\";\n\n    for (int j = j_max; j > 0; j--) {\n      double q_thresh = j*dq;\n      for (int i = 0; i < NX; i++) {\n\tif (q1[i] > q_thresh) {\n\t  line[i] = '#';\n\t}\n\telse {\n\t  line[i] = ' ';\n\t}\n      }\n      std::cout << line << \"\\n\";\n    }\n    for (int i = 0; i < NX; i++) {\n      line[i] = '-';\n    }\n    std::cout << line << \"\\n\";\n    for (int j = -1; j > j_min; j--) {\n      double q_thresh = j*dq;\n      for (int i = 0; i < NX; i++) {\n\tif (q1[i] <= q_thresh) {\n\t  line[i] = '$';\n\t}\n\telse {\n\t  line[i] = ' ';\n\t}\n      }\n      std::cout << line << \"\\n\";\n      std::cout.flush();\n    }\n    nanosleep(&t, 0);\n    //toon(nt, dt, q1, q2);\n    lax_wendroff(nt, dt, q1, q2);\n    double* tmp = q1;\n    q2 = q1;\n    q1 = tmp;  \n  }\n  return 0;\n}\n"
  },
  {
    "path": "benchmark/autodiff_benchmark.cpp",
    "content": "/* autodiff_benchmark.cpp - Program to benchmark different automatic differentiation tools\n\n  Copyright (C) 2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <sstream>\n#include <iostream>\n#include <vector>\n#include <cmath>\n#include <valarray>\n\n#include \"differentiator.h\"\n\n#include <adept.h>\nusing adept::Real;\n\nstatic\nReal\nrms(const std::vector<Real>& a, const std::vector<Real>&b)\n{\n  if (a.size() != b.size()) {\n    throw differentiator_exception(\"Attempt to compute RMS difference between vectors of different size\");\n  }\n  Real sum = 0.0;\n  for (size_t i = 0; i < a.size(); i++) {\n    sum += (a[i]-b[i])*(a[i]-b[i]);\n  }\n  return sqrt(sum/a.size());\n}\n\nstatic\nvoid\nusage(const char* argv0)\n{\n  std::cout << \"Usage: \" << argv0 << \" [OPTIONS] where OPTIONS can be\\n\";\n  std::cout << \"  -h|--help          Print this message\\n\";\n  std::cout << \"  -a|--algorithm  s  Use test algorithms specified by string s which may be\\n\";\n  std::cout << \"                     \\\"all\\\" or a comma separated list with possible entries\\n\";\n  std::cout << \"                     \" << test_algorithms() << \"\\n\";\n  std::cout << \"  -t|--tool       s  Use automatic differentiation tools specified by string\\n\";\n  std::cout << \"                     s which may be \\\"all\\\" or a comma separated list with\\n\";\n  std::cout << \"                     possible entries \" << autodiff_tools() << \"\\n\";    \n  std::cout << \"  -r|--repeat     n  Benchmark repeats the simulation n times\\n\";\n  std::cout << \"  -j|--jrepeat    n  Repeat the Jacobian simulation n times\\n\";\n  std::cout << \"  -n|--timesteps  n  Simulation uses n timesteps\\n\";\n  std::cout << \"  --print-result     Print the final output from the simulation(s)\\n\";\n  std::cout << \"  --print-adjoint    Print the hand-coded adjoint\\n\";\n  std::cout << \"  --print-jacobian   Print the hand-coded Jacobian matrix\\n\";\n  std::cout << \"  --no-openmp        Don't use OpenMP to speed up Adept\\n\";\n  std::cout << \"  --jacobian-forward Force use of forward-mode Jacobian\\n\";\n  std::cout << \"  --jacobian-reverse Force use of reverse-mode Jacobian\\n\";\n  std::cout << \"  --tolerance     x  Agreement with hand-coded requires RMS difference < x\\n\";\n  std::cout << \"  --verify-only      No benchmark: only verify correctness of results\\n\";\n  std::cout << \"Return code: 0 if all automatic differentiation tools produce adjoints and\\n\"\n    \"  Jacobians whose RMS difference with the values from hand-coded\\n\"\n    \"  differentiation is less than the required tolerance; 1 otherwise.\\n\";\n}\n\nint\nmain(int argc, char** argv)\n{\n  int nt = 2000;\n  int nr = 100;\n  int nr_jacobian = nr/10;\n  Real dt = 0.125;\n  Real tolerance = 1.0e-5;\n  int force_jacobian = 0;\n\n  bool verbose = false;\n  bool print_result = false;\n  bool print_adjoint = false;\n  bool print_jacobian = false;\n  bool no_openmp = false;\n  bool verify_only = false;\n\n  std::valarray<bool> use_tool(N_AUTODIFF_TOOLS);\n  std::valarray<bool> use_algorithm(N_TEST_ALGORITHMS);\n\n  use_tool = true;\n  use_algorithm = true;\n\n  int iarg = 1;\n\n  while (iarg < argc) {\n    if (std::string(\"-h\") == argv[iarg]\n\t|| std::string(\"--help\") == argv[iarg]) {\n      usage(argv[0]);\n      return 0;\n    }\n    if (std::string(\"-v\") == argv[iarg]\n\t|| std::string(\"--verbose\") == argv[iarg]) {\n      verbose = true;\n    }\n    else if (std::string(\"--print-result\") == argv[iarg]) {\n      print_result = true;\n    }\n    else if (std::string(\"--print-adjoint\") == argv[iarg]) {\n      print_adjoint = true;\n    }\n    else if (std::string(\"--print-jacobian\") == argv[iarg]) {\n      print_jacobian = true;\n    }\n    else if (std::string(\"--jacobian-forward\") == argv[iarg]) {\n      force_jacobian = +1;\n    }\n    else if (std::string(\"--jacobian-reverse\") == argv[iarg]) {\n      force_jacobian = -1;\n    }\n    else if (std::string(\"--no-openmp\") == argv[iarg]) {\n      no_openmp = true;\n    }\n    else if (std::string(\"--verify-only\") == argv[iarg]) {\n      verify_only = true;\n    }\n    else if (std::string(\"-a\") == argv[iarg]\n\t     || std::string(\"--algorithm\") == argv[iarg]) {\n      if (++iarg < argc) {\n\tif (std::string(argv[iarg]) != \"all\") {\n\t  use_algorithm = false;\n\t  std::istringstream ss(argv[iarg]);\n\t  std::string alg;\n\t  while (std::getline(ss, alg, ',')) {\n\t    bool found = false;\n\t    for (int i = 0; i < N_TEST_ALGORITHMS; i++) {\n\t      if (alg == test_algorithm_string[i]) {\n\t\tuse_algorithm[i] = true;\n\t\tfound = true;\n\t\tbreak;\n\t      }\n\t    }\n\t    if (!found) {\n\t      std::cout << \"Test algorithm \\\"\"\n\t\t\t<< alg << \"\\\" not available; available algorithms are \"\n\t\t\t<< test_algorithms() << \"\\n\";\n\t    }\n\t  }\n\t}\n      }\n      else {\n\tstd::cout << \"Arguments \\\"-a\\\" or \\\"--algorithm\\\" need to be followed by a string containing a comma-separated list of algorithms\\n\";\n\treturn 1;\n      }\n    }\n    else if (std::string(\"-t\") == argv[iarg]\n\t     || std::string(\"--tool\") == argv[iarg]) {\n      if (++iarg < argc) {\n\tif (std::string(argv[iarg]) != \"all\") {\n\t  use_tool = false;\n\t  std::istringstream ss(argv[iarg]);\n\t  std::string tool;\n\t  while (std::getline(ss, tool, ',')) {\n\t    bool found = false;\n\t    for (int i = 0; i < N_AUTODIFF_TOOLS; i++) {\n\t      if (tool == autodiff_tool_string[i]) {\n\t\tuse_tool[i] = true;\n\t\tfound = true;\n\t\tbreak;\n\t      }\n\t    }\n\t    if (!found) {\n\t      std::cout << \"Automatic differentiation tool \\\"\"\n\t\t\t<< tool << \"\\\" not available; available tools are \"\n\t\t\t<< autodiff_tools() << \"\\n\";\n\t    }\n\t  }\n\t}\n      }\n      else {\n\tstd::cout << \"Arguments \\\"-a\\\" or \\\"--algorithm\\\" need to be followed by a string containing a comma-separated list of algorithms\\n\";\n\treturn 1;\n      }\n    }\n    else if (std::string(\"-r\") == argv[iarg]\n\t     || std::string(\"--repeat\") == argv[iarg]) {\n      if (++iarg < argc) {\n\tstd::stringstream ss(argv[iarg]);\n\tif (ss >> nr) {\n\t  if (nr <= 0) { \n\t    std::cout << \"Number of repeats must be greater than zero\\n\";\n\t    return 1;\n\t  }\n\t}\n\telse {\n\t  std::cout << \"Failed to read \\\"\"\n\t\t    << argv[iarg]\n\t\t    << \"\\\"as an integer\\n\";\n\t  return 1;\n\t}\n      }\n      else {\n\tthrow differentiator_exception(\"Arguments \\\"-r\\\" or \\\"--repeat\\\" need to be followed by a number\");\n      }\n    }\n    else if (std::string(\"-j\") == argv[iarg]\n\t     || std::string(\"--jrepeat\") == argv[iarg]) {\n      if (++iarg < argc) {\n\tstd::stringstream ss(argv[iarg]);\n\tif (ss >> nr_jacobian) {\n\t  if (nr <= 0) { \n\t    throw differentiator_exception(\"Number of repeats must be greater than zero\");\n\t  }\n\t}\n\telse {\n\t  std::string msg = \"Failed to read \\\"\";\n\t  msg += argv[iarg];\n\t  msg += \"\\\"as an integer\";\n\t  throw differentiator_exception(msg.c_str());\n\t}\n      }\n      else {\n\tthrow differentiator_exception(\"Arguments \\\"-j\\\" or \\\"--jrepeat\\\" need to be followed by a number\");\n      }\n    }\n    else if (std::string(\"-n\") == argv[iarg]\n\t     || std::string(\"--timesteps\") == argv[iarg]) {\n      if (++iarg < argc) {\n\tstd::stringstream ss(argv[iarg]);\n\tif (ss >> nt) {\n\t  if (nt < 0) { \n\t    throw differentiator_exception(\"Number of timesteps must be greater than or equal to zero\");\n\t  }\n\t}\n\telse {\n\t  std::string msg = \"Failed to read \\\"\";\n\t  msg += argv[iarg];\n\t  msg += \"\\\"as an integer\";\n\t  throw differentiator_exception(msg.c_str());\n\t}\n      }\n      else {\n\tthrow differentiator_exception(\"Arguments \\\"-n\\\" or \\\"--timesteps\\\" need to be followed by a number\");\n      }\n    }\n    else if (std::string(\"--tolerance\") == argv[iarg]) {\n      if (++iarg < argc) {\n\tstd::stringstream ss(argv[iarg]);\n\tif (ss >> tolerance) {\n\t  if (tolerance < 0) { \n\t    throw differentiator_exception(\"Tolerance must be greater than or equal to zero\");\n\t  }\n\t}\n\telse {\n\t  std::string msg = \"Failed to read \\\"\";\n\t  msg += argv[iarg];\n\t  msg += \"\\\"as a Real\";\n\t  throw differentiator_exception(msg.c_str());\n\t}\n      }\n      else {\n\tthrow differentiator_exception(\"Arguments \\\"-j\\\" or \\\"--jrepeat\\\" need to be followed by a number\");\n      }\n    }\n    else {\n      std::string msg = \"Argument \\\"\";\n      msg += argv[iarg];\n      msg += \"\\\" not understood\\n\";\n      std::cout << msg;\n      usage(argv[0]);\n      return 1;\n    }\n    iarg++;\n  }\n\n  Real pi = 4.0*atan(1.0);\n  std::vector<Real> q_init(NX);\n  std::vector<Real> q(NX);\n  std::vector<Real> q_AD(NX);\n  std::vector<Real> q_init_AD(NX);\n  std::vector<Real> q_init_AD_reference(NX);\n  std::vector<Real> jac(NX*NX);\n  std::vector<Real> jac_reference(NX*NX);\n\n  int nr_warm_up = nr/10;\n  int nr_jacobian_warm_up = nr_jacobian/10;\n  if (nr_warm_up < 1) {\n    nr_warm_up = 1;\n  }\n  if (nr_jacobian_warm_up < 1) {\n    nr_jacobian_warm_up = 1;\n  }\n\n  if (verify_only) {\n    nr = 0;\n    nr_jacobian = 0;\n    nr_warm_up = 1;\n    nr_jacobian_warm_up = 1;\n  }\n\n  for (int i = 0; i < NX; i++) q_init[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+1;\n  for (int i = 0; i < NX; i++) q_AD[i] = 0.1;\n\n  bool verify_error = false;\n\n  Timer timer;\n\n  std::cout << \"Automatic differentiation benchmark and verification\\n\";\n  std::cout << \"   Automatic differentiation tools = \";\n  bool is_first = true;\n  for (int i = 0; i < N_AUTODIFF_TOOLS; i++) {\n    if (use_tool[i]) {\n      if (!is_first) {\n\tstd::cout << \", \";\n      }\n      else {\n\tis_first = false;\n      }\n      std::cout << autodiff_tool_long_string[i];\n    }\n  }\n  std::cout << \"\\n\";\n\n  std::cout << \"   Test algorithms = \";\n  is_first = true;\n  for (int i = 0; i < N_TEST_ALGORITHMS; i++) {\n    if (use_algorithm[i]) {\n      if (!is_first) {\n\tstd::cout << \", \";\n      }\n      else {\n\tis_first = false;\n      }\n      std::cout << test_algorithm_long_string[i];\n    }\n  }\n  std::cout << \"\\n\";\n\n  std::cout << \"   Number of x points = \" << NX << \"\\n\";\n  std::cout << \"   Number of timesteps = \" << nt << \", Courant number = \" << dt << \"\\n\";\n  if (!verify_only) {\n    std::cout << \"   Algorithm repeats = \" << nr << \", warm-up repeats = \" << nr_warm_up << \"\\n\";\n    std::cout << \"   Jacobian repeats = \" << nr_jacobian << \", warm-up repeats = \" << nr_jacobian_warm_up << \"\\n\";\n  }\n  else {\n    std::cout << \"   Verifying results only: no repeats\\n\";\n  }\n\n  std::cout << adept::configuration();\n\n  // Loop through test algorithms\n  for (int ialg = 0; ialg < N_TEST_ALGORITHMS; ialg++) {\n    if (use_algorithm[ialg]) {\n\n      std::string algorithm_string = test_algorithm_long_string[ialg];\n      std::cout << \"\\nRunning test algorithm \\\"\" << algorithm_string << \"\\\":\\n\";\n      \n      TestAlgorithm ta = static_cast<TestAlgorithm>(ialg);\n      \n      std::cout << \"   Hand coded (forward-mode Jacobian only)\\n\";\n      \n      HandCodedDifferentiator hand_coded_differentiator(timer, algorithm_string);\n      hand_coded_differentiator.initialize(nt, dt);\n      for (int i = 0; i < nr_warm_up; i++) {\n\thand_coded_differentiator.func(ta, q_init, q);\n\thand_coded_differentiator.adjoint(ta, q_init, q, q_AD, q_init_AD_reference);\n\thand_coded_differentiator.jacobian(ta, q_init, q, jac_reference);\n      }\n      hand_coded_differentiator.reset_timings();\n      for (int i = 0; i < nr; i++) {\n\thand_coded_differentiator.func(ta, q_init, q);\n\thand_coded_differentiator.adjoint(ta, q_init, q, q_AD, q_init_AD_reference);\n\thand_coded_differentiator.jacobian(ta, q_init, q, jac_reference);\n      }\n      \n      if (print_result) {\n\tstd::cout << \"      result = [\" << q[0];\n\tfor (int i = 1; i < NX; i++) {\n\t  std::cout << \", \" << q[i];\n\t}\n\tstd::cout << \"]\\n\";\n      }\n      \n      if (print_adjoint) {\n\tstd::cout << \"adjoint = [\" << q_init_AD_reference[0];\n\tfor (int i = 1; i < NX; i++) {\n\t  std::cout << \", \" << q_init_AD_reference[i];\n\t}\n\tstd::cout << \"]\\n\";\n      }\n      if (print_jacobian) {\n\tReal (&q_K)[NX][NX]\n\t  = *reinterpret_cast<Real(*)[NX][NX]>(&jac_reference[0]);\n\tstd::cout << \"jacobian = [\\n\";\n\tfor (int i = 0; i < NX; i++) {\n\t  std::cout << q_K[i][0];\n\t  for (int j = 1; j < NX; j++) {\n\t    std::cout << \", \" << q_K[i][j];\n\t}\n\t  std::cout << \"\\n\";\n\t}\n\tstd::cout << \"]\\n\";\n      }\n      \n      Real base_time = timer.timing(hand_coded_differentiator.base_timer_id());\n      \n      if (!verify_only) {\n\tstd::cout << \"      Time of original algorithm: \" << base_time << \" seconds\\n\";\n\tstd::cout << \"      Absolute time of adjoint: \" \n\t\t  << timer.timing(hand_coded_differentiator.adjoint_compute_timer_id())\n\t\t  << \" s\\n\";\n\tstd::cout << \"      Relative time of adjoint: \" \n\t\t  << timer.timing(hand_coded_differentiator.adjoint_compute_timer_id())\n\t  / base_time << \"\\n\";\n\tstd::cout << \"      Absolute time of Jacobian: \" \n\t\t  << timer.timing(hand_coded_differentiator.jacobian_timer_id())\n\t\t  << \" s\\n\";\n\tstd::cout << \"      Relative time of Jacobian: \" \n\t\t  << timer.timing(hand_coded_differentiator.jacobian_timer_id())\n\t  / base_time << \"\\n\";\n      }\n      \n      for (int itool = 0; itool < N_AUTODIFF_TOOLS; itool++) {\n\tif (use_tool[itool]) {\n\t  Differentiator* differentiator\n\t    = new_differentiator(static_cast<AutoDiffTool>(itool),\n\t\t\t\t timer, algorithm_string);\n\t  if (!differentiator) {\n\t    if (verbose) std::cout << \"Automatic differentiation tool with code \" << itool << \" not available\\n\";\n\t    continue;\n\t  }\n\t  \n\t  differentiator->initialize(nt, dt);\n\t  if (no_openmp) {\n\t    differentiator->no_openmp();\n\t  }\n\t  \n\t  std::cout << \"   \" << differentiator->name() << \"\\n\";\n\t  \n\t  if (test_algorithm_is_vector[ialg] && !differentiator->supports_vector_calls()) {\n\t    std::cout << \"     ...vector calls not supported\\n\";\n\t    delete differentiator;\n\t    continue;\n\t  }\n\n\t  for (int i = 0; i < nr_warm_up; i++) {\n\t    differentiator->adjoint(ta, q_init, q, q_AD, q_init_AD);\n\t  }\n\t  Real rms_verify = rms(q_init_AD, q_init_AD_reference);\n\t  if (rms_verify > tolerance) {\n\t    std::cout << \"      *** Adjoint RMS difference with hand-coded of \" << rms_verify << \" is greater than tolerance of \" << tolerance << \" ***\\n\";\n\t    if (print_adjoint) {\n\t      std::cout << \"adjoint_auto = [\" << q_init_AD[0];\n\t      for (int i = 1; i < NX; i++) {\n\t\tstd::cout << \", \" << q_init_AD[i];\n\t      }\n\t      std::cout << \"]\\n\";\n\t    }\n\n\t    verify_error = true;\n\t  }\n\t  else {\n\t    std::cout << \"      Adjoint RMS difference with hand-coded of \" << rms_verify << \" is within tolerance of \" << tolerance << \"\\n\";\n\t  }\n\n\t  for (int i = 0; i < nr_jacobian_warm_up; i++) {\n\t    differentiator->jacobian(ta, q_init, q, jac, force_jacobian);\n\t  }\n\t  rms_verify = rms(jac, jac_reference);\n\t  if (rms_verify > tolerance) {\n\t    std::cout << \"      *** Jacobian RMS difference with hand-coded of \" << rms_verify << \" is greater than tolerance of \" << tolerance << \" ***\\n\";\n\t    verify_error = true;\n\t  }\n\t  else {\n\t    std::cout << \"      Jacobian RMS difference with hand-coded of \" << rms_verify << \" is within tolerance of \" << tolerance << \"\\n\";\n\t  }\n\t  \n\t  \n\t  if (!verify_only) {\n\t    differentiator->reset_timings();\n\t    for (int i = 0; i < nr; i++) {\n\t      differentiator->adjoint(ta, q_init, q, q_AD, q_init_AD);\n\t    }\n\n\t    Real relative_record_time = timer.timing(differentiator->base_timer_id())\n\t      / base_time;\n\t    Real relative_adjoint_time\n\t      = timer.timing(differentiator->adjoint_compute_timer_id())\n\t      / base_time;\n\t    Real relative_adjoint_prep_time\n\t      = timer.timing(differentiator->adjoint_prep_timer_id())\n\t      / base_time;\n\n\t    std::cout << \"      Absolute time of adjoint: \"\n\t\t      << timer.timing(differentiator->base_timer_id())\n\t      + timer.timing(differentiator->adjoint_compute_timer_id())\n\t      + timer.timing(differentiator->adjoint_prep_timer_id())\n\t\t      << \" s (\" \n\t\t      << timer.timing(differentiator->base_timer_id())\n\t\t      << \" s + \";\n\t    if (relative_adjoint_prep_time > 0.0) {\n\t      std::cout << timer.timing(differentiator->adjoint_prep_timer_id()) \n\t\t\t<< \" s + \";\n\t    }\n\t    std::cout <<  timer.timing(differentiator->adjoint_compute_timer_id())\n\t\t      << \" s)\\n\";\n\t    std::cout << \"      Relative time of adjoint: \"\n\t\t      << relative_record_time + relative_adjoint_prep_time\n\t      + relative_adjoint_time\n\t\t      << \" (\" << relative_record_time << \" + \";\n\t    if (relative_adjoint_prep_time > 0.0) {\n\t      std::cout << relative_adjoint_prep_time << \" + \";\n\t    }\n\t    std::cout << relative_adjoint_time << \")\\n\";\n\t    differentiator->reset_timings();\n\t  }\n\t  \n\t  for (int i = 0; i < nr_jacobian; i++) {\n\t    differentiator->jacobian(ta, q_init, q, jac, force_jacobian);\n\t  }\n\t  \n\t  if (print_jacobian) {\n\t    Real (&q_K)[NX][NX]\n\t      = *reinterpret_cast<Real(*)[NX][NX]>(&jac[0]);\n\t    std::cout << \"jacobian_auto = [\\n\";\n\t    for (int i = 0; i < NX; i++) {\n\t      std::cout << q_K[i][0];\n\t      for (int j = 1; j < NX; j++) {\n\t\tstd::cout << \", \" << q_K[i][j];\n\t      }\n\t      std::cout << \"\\n\";\n\t    }\n\t    std::cout << \"]\\n\";\n\t  }\n\t  \n\t  if (!verify_only) {\n\t    Real relative_record_time = (nr*timer.timing(differentiator->base_timer_id()))\n\t      /(nr_jacobian*base_time);\n\t    Real relative_jacobian_time = (nr*timer.timing(differentiator->jacobian_timer_id()))\n\t      /(nr_jacobian*base_time);\n\t    Real relative_adjoint_prep_time = (nr*timer.timing(differentiator->adjoint_prep_timer_id()))\n\t      /(nr_jacobian*base_time);\n\t    std::cout << \"      Absolute time of Jacobian: \"\n\t\t      << timer.timing(differentiator->base_timer_id())\n\t      + timer.timing(differentiator->adjoint_prep_timer_id())\n\t      + timer.timing(differentiator->jacobian_timer_id())\n\t\t      << \" s (\"\n\t\t      << timer.timing(differentiator->base_timer_id()) \n\t\t      << \" s + \";\n\t    if (relative_adjoint_prep_time > 0.0) {\n\t      std::cout << timer.timing(differentiator->adjoint_prep_timer_id())\n\t\t\t<< \" s + \";\n\t    }\n\t    std::cout << timer.timing(differentiator->jacobian_timer_id())\n\t\t      << \" s)\\n\";\n\t    std::cout << \"      Relative time of Jacobian: \"\n\t\t      << relative_record_time + relative_adjoint_prep_time + relative_jacobian_time\n\t\t      << \" (\" << relative_record_time << \" + \";\n\t    if (relative_adjoint_prep_time > 0.0) {\n\t      std::cout << relative_adjoint_prep_time << \" + \";\n\t    }\n\t    std::cout << relative_jacobian_time << \")\\n\";\n\t  }\n\t  differentiator->print();\n\t  delete differentiator;\n\t}\n      }\n    }\n  }\n  if (verify_error) {\n    std::cout << \"\\nEXITING WITH ERROR CODE 1: ONE OR MORE OF THE AUTOMATIC DIFFERENTIATION\\n\"\n\t      << \"TOOLS DID NOT REPRODUCE THE HAND-CODING RESULT\\n\";\n    return 1;\n  }\n  else {\n    std::cout << \"\\nAll tests were passed within tolerance\\n\";\n    return 1;\n  }\n}\n\n"
  },
  {
    "path": "benchmark/differentiator.h",
    "content": "/* differentiator.h\n\n  Copyright (C) 2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#ifdef HAVE_CONFIG_H\n#include \"config.h\"\n#endif\n\n#include <iostream>\n#include <vector>\n#include <exception>\n#include <cmath>\n#include <string>\n\n#include \"Timer.h\"\n\n#include \"adept.h\"\n\nusing adept::Real;\n\n#ifdef HAVE_ADOLC\n// Note that ADOL-C places the \"adouble\" type in the global namespace\n#include \"adolc/adolc.h\"\n#endif\n\n#ifdef HAVE_CPPAD\n#include \"cppad/cppad.hpp\"\n#endif\n\n#ifdef HAVE_SACADO\n#include \"Sacado.hpp\"\n#endif\n\n#include \"advection_schemes.h\"\n#include \"advection_schemes_AD.h\"\n#include \"advection_schemes_K.h\"\n\n\nenum TestAlgorithm {\n  TEST_ALGORITHM_LAX_WENDROFF = 0,\n  TEST_ALGORITHM_TOON = 1,\n  TEST_ALGORITHM_LAX_WENDROFF_VECTOR = 2,\n  TEST_ALGORITHM_TOON_VECTOR = 3,\n  N_TEST_ALGORITHMS\n};\n\nconst char* test_algorithm_long_string[] = {\"Lax-Wendroff\", \"Toon et al.\",\n\t\t\t\t\t    \"Lax-Wendroff vector\", \"Toon et al. vector\"};\nconst char* test_algorithm_string[] = {\"lw\",\"toon\",\"lw_vector\", \"toon_vector\"};\n\nconst bool test_algorithm_is_vector[] = {false, false, true, true};\n\ninline\nstd::string\ntest_algorithms()\n{\n  std::string algs = test_algorithm_string[0];\n  for (int i = 1; i < N_TEST_ALGORITHMS; i++) {\n    algs += \",\";\n    algs += test_algorithm_string[i];\n  }\n  return algs;\n}\n\n\nclass differentiator_exception : public std::exception {\npublic:\n  differentiator_exception(const char* message = \"An error occurred in differentiator.h\")\n  { message_ = message; }\n  virtual const char* what() const throw()\n  { return message_; }\nprotected:\n  const char* message_;\n};\n\n// Base class from which specialist differentiators (hand-coded,\n// Adept, ADOL-C etc) inherit\nclass Differentiator {\npublic:\n  Differentiator(Timer& timer) \n    : timer_(timer) {\n    initialize(2000, 0.125); \n  }\n\n  virtual ~Differentiator() { }\n\n  virtual void print() { }\n\n  void initialize(int nt, Real c) {\n    nt_ = nt;\n    c_ = c;\n  }\n\n  virtual bool supports_vector_calls() { return false; }\n  \n  // Call the function to be differentiated, with the active type\n  // provided as a template argument\n  template <class ActiveRealType>\n  void func(TestAlgorithm test_algorithm,\n\t    const std::vector<ActiveRealType>& x,\n\t    std::vector<ActiveRealType>& y) {\n    timer_.start(base_timer_id_);\n    if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) {\n      lax_wendroff(nt_, c_, &x[0], &y[0]);\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON) {\n      toon(nt_, c_, &x[0], &y[0]);\n    }\n    timer_.stop();\n  }\n\n  virtual bool adjoint(TestAlgorithm test_algorithm,\n\t\t       const std::vector<Real>& x,\n\t\t       std::vector<Real>& y,\n\t\t       const std::vector<Real>& y_AD,\n\t\t       std::vector<Real>& x_AD) {\n    return false;\n  }\n\n  virtual bool jacobian(TestAlgorithm test_algorithm,\n\t\t\tconst std::vector<Real>& x,\n\t\t\tstd::vector<Real>& y,\n\t\t\tstd::vector<Real>& jac,\n\t\t\tint force_jacobian = 0) {\n    return false;\n  }\n\n  void reset_timings() {\n    timer_.reset(base_timer_id_);\n    timer_.reset(adjoint_prep_timer_id_);\n    timer_.reset(adjoint_compute_timer_id_);\n    timer_.reset(jacobian_timer_id_);\n  }\n\n  virtual std::string name() const = 0; //{ return \"GENERIC\"; }\n\n  virtual void no_openmp() { }\n\n  int base_timer_id() const { return base_timer_id_; }\n  int adjoint_prep_timer_id() const { return adjoint_prep_timer_id_; }\n  int adjoint_compute_timer_id() const { return adjoint_compute_timer_id_; }\n  int jacobian_timer_id() const { return jacobian_timer_id_; }\n\nprotected:\n  void init_timer(const std::string name_) {\n    base_timer_id_ = timer_.new_activity(name() + \" | \" + name_ + \" | record\");\n    adjoint_prep_timer_id_ = timer_.new_activity(name() + \" | \" + name_ + \" | adjoint prep\");\n    adjoint_compute_timer_id_ = timer_.new_activity(name() + \" | \" + name_ + \" | adjoint compute\");\n    jacobian_timer_id_ = timer_.new_activity(name() + \" | \" + name_ + \" | Jacobian\");\n  }\n\nprotected:\n  Timer& timer_;\n  int nt_; // Number of timesteps to run\n  Real c_;  // Courant number\n  int base_timer_id_;\n  int adjoint_prep_timer_id_;\n  int adjoint_compute_timer_id_;\n  int jacobian_timer_id_;\n};\n\n// ================= HAND CODED ===========================\n#include \"advection_schemes_AD.h\"\n\nclass HandCodedDifferentiator\n  : public Differentiator {\npublic:\n  HandCodedDifferentiator(Timer& timer, const std::string& name_)\n    : Differentiator(timer) {\n    init_timer(name_);\n  }\n\n  virtual bool supports_vector_calls() { return true; }\n  \n  virtual bool adjoint(TestAlgorithm test_algorithm,\n\t\t       const std::vector<Real>& x,\n\t\t       std::vector<Real>& y,\n\t\t       const std::vector<Real>& y_AD,\n\t\t       std::vector<Real>& x_AD) {\n    if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) {\n      timer_.start(adjoint_compute_timer_id_);\n      lax_wendroff_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]);\n      timer_.stop();\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON) {\n      timer_.start(adjoint_compute_timer_id_);\n      toon_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]);\n      timer_.stop();\n    }\n    else if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF_VECTOR) {\n      timer_.start(adjoint_compute_timer_id_);\n      lax_wendroff_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]);\n      timer_.stop();\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON_VECTOR) {\n      timer_.start(adjoint_compute_timer_id_);\n      toon_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]);\n      timer_.stop();\n    }\n    else {\n      std::cerr << \"Algorithm not found: \" << test_algorithm << \"\\n\";\n      return false;\n    }\n    return true;\n  }\n\n  virtual bool jacobian(TestAlgorithm test_algorithm,\n\t\t\tconst std::vector<Real>& x,\n\t\t\tstd::vector<Real>& y,\n\t\t\tstd::vector<Real>& jac,\n\t\t\tint force_jacobian = 0) {\n    jac.resize(NX*NX);\n    if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) {\n      timer_.start(jacobian_timer_id_);\n      lax_wendroff_K(nt_, c_, &x[0], &y[0], &jac[0]);\n      timer_.stop();\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON) {\n      timer_.start(jacobian_timer_id_);\n      toon_K(nt_, c_, &x[0], &y[0], &jac[0]);\n      timer_.stop();\n    }\n    else if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF_VECTOR) {\n      timer_.start(jacobian_timer_id_);\n      lax_wendroff_K(nt_, c_, &x[0], &y[0], &jac[0]);\n      timer_.stop();\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON_VECTOR) {\n      timer_.start(jacobian_timer_id_);\n      toon_K(nt_, c_, &x[0], &y[0], &jac[0]);\n      timer_.stop();\n    }\n    else {\n      std::cerr << \"Algorithm not found: \" << test_algorithm << \"\\n\";\n      return false;\n    }\n    return true;\n  }\n\n  virtual std::string name() const { return \"Hand coded\"; }\n};\n\n\n\n// ================= ADEPT ================================ \n\nclass AdeptDifferentiator\n  : public Differentiator {\npublic:\n  AdeptDifferentiator(Timer& timer, const std::string& name_)\n    : Differentiator(timer) { init_timer(name_); }\n\n  virtual ~AdeptDifferentiator() { }\n\n  virtual bool supports_vector_calls() { return true; }\n  \n  // Need to overload the function in the base class, because only\n  // Adept supports the _VECTOR versions of the algorithms\n  template <class ActiveRealType>\n  void func(TestAlgorithm test_algorithm,\n\t    const std::vector<ActiveRealType>& x,\n\t    std::vector<ActiveRealType>& y) {\n    timer_.start(base_timer_id_);\n    if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) {\n      lax_wendroff(nt_, c_, &x[0], &y[0]);\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON) {\n      toon(nt_, c_, &x[0], &y[0]);\n    }\n    else if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF_VECTOR) {\n      lax_wendroff_vector(nt_, c_, &x[0], &y[0]);\n    }\n    else if (test_algorithm == TEST_ALGORITHM_TOON_VECTOR) {\n      toon_vector(nt_, c_, &x[0], &y[0]);\n    }\n    timer_.stop();\n  }\n\n  virtual bool adjoint(TestAlgorithm test_algorithm,\n\t\t       const std::vector<Real>& x,\n\t\t       std::vector<Real>& y,\n\t\t       const std::vector<Real>& y_AD,\n\t\t       std::vector<Real>& x_AD) {\n    if (x.size() != NX || y_AD.size() != NX) {\n      throw differentiator_exception(\"One of input vectors not of size NX in call to AdeptDifferentiator::adjoint\");\n    }\n    y.resize(NX);\n    x_AD.resize(NX);\n\n    std::vector<adept::aReal> q_init(NX);\n    std::vector<adept::aReal> q(NX);\n\n    adept::set_values(&q_init[0], NX, &x[0]);\n\n    stack_.new_recording();\n    func(test_algorithm, q_init, q);\n\n    timer_.start(adjoint_compute_timer_id_);\n\n    adept::set_gradients(&q[0], NX, &y_AD[0]);\n    stack_.compute_adjoint();\n    adept::get_gradients(&q_init[0], NX, &x_AD[0]);\n\n    timer_.stop();\n\n    return true;\n  }\n\n\n  virtual bool jacobian(TestAlgorithm test_algorithm,\n\t\t\tconst std::vector<Real>& x,\n\t\t\tstd::vector<Real>& y,\n\t\t\tstd::vector<Real>& jac,\n\t\t\tint force_jacobian = 0) {\n    if (x.size() != NX) {\n      throw differentiator_exception(\"Input vector x not of size NX in call to AdeptDifferentiator::jacobian\");\n    }\n    y.resize(NX);\n    jac.resize(NX*NX);\n\n    std::vector<adept::aReal> q_init(NX);\n    std::vector<adept::aReal> q(NX);\n\n    adept::set_values(&q_init[0], NX, &x[0]);\n\n    stack_.new_recording();\n    func(test_algorithm, q_init, q);\n\n    stack_.independent(&q_init[0], NX);\n    stack_.dependent(&q[0], NX);\n\n    timer_.start(jacobian_timer_id_);\n    if (force_jacobian > 0) {\n      stack_.jacobian_forward(&jac[0]);\n    }\n    else if (force_jacobian < 0) {\n      stack_.jacobian_reverse(&jac[0]);\n    }\n    else {\n      stack_.jacobian(&jac[0]);\n    }\n    timer_.stop();\n    return true;\n  }\n\n  virtual std::string name() const {\n    std::stringstream name_;\n    name_ << \"Adept\";\n    int nthread = stack_.max_jacobian_threads();\n    if (nthread > 1) {\n      name_ << \" (Jacobian using up to \" << nthread << \" OpenMP threads)\";\n    }\n    else {\n      name_ << \" (single threaded)\";\n    }\n    return name_.str(); \n  }\n\n  virtual void no_openmp() { \n    stack_.set_max_jacobian_threads(1);\n  }\n\n  virtual void print() {\n    std::cout << \"========== ADEPT STACK BEGIN ==========\\n\";\n    std::cout << stack_;\n    std::cout << \"========== ADEPT STACK END ============\\n\";\n  }\n\nprivate:\n  adept::Stack stack_;\n};\n\n \n\n#ifdef HAVE_ADOLC\n\n// ================= ADOLC ================================ \n\nclass AdolcDifferentiator\n  : public Differentiator {\npublic:\n  AdolcDifferentiator(Timer& timer, const std::string& name_)\n    : Differentiator(timer), jac(0), I(0), result(0) { init_timer(name_); }\n\n  // Note that ADOL-C places the \"adouble\" type in the global namespace\n  typedef adouble aReal;\n  \n  virtual ~AdolcDifferentiator() {\n    if (I) {\n      myfreeI2(NX, I);\n    }\n    if (jac) {\n      myfree2(jac);\n    }\n    if (result) {\n      myfree1(result);\n    }\n  }\n\n  virtual bool adjoint(TestAlgorithm test_algorithm,\n\t\t       const std::vector<Real>& x,\n\t\t       std::vector<Real>& y,\n\t\t       const std::vector<Real>& y_AD,\n\t\t       std::vector<Real>& x_AD) {\n    if (x.size() != NX || y_AD.size() != NX) {\n      throw differentiator_exception(\"One of input vectors not of size NX in call to AdolcDifferentiator::adjoint\");\n    }\n    y.resize(NX);\n    x_AD.resize(NX);\n\n    std::vector<aReal> q_init(NX);\n    std::vector<aReal> q(NX);\n\n    trace_on(1,1);\n\n    for (int i = 0; i < NX; i++) {\n      q_init[i] <<= x[i];\n    }\n\n    func(test_algorithm, q_init, q);\n\n    for (int i = 0; i < NX; i++) {\n      q[i] >>= y[i];\n    }\n\n    trace_off();\n\n    timer_.start(adjoint_compute_timer_id_);\n\n    reverse(1, NX, NX, 0, const_cast<Real*>(&y_AD[0]), &x_AD[0]);                                                \n\n    timer_.stop();\n    return true;\n  }\n\n\n  virtual bool jacobian(TestAlgorithm test_algorithm,\n\t\t\tconst std::vector<Real>& x,\n\t\t\tstd::vector<Real>& y,\n\t\t\tstd::vector<Real>& jac_,\n\t\t\tint force_jacobian = 0) {\n    if (x.size() != NX) {\n      throw differentiator_exception(\"Input vector x not of size NX in call to AdolcDifferentiator::jacobian\");\n    }\n    y.resize(NX);\n    jac_.resize(NX*NX);\n\n    std::vector<aReal> q_init(NX);\n    std::vector<aReal> q(NX);\n\n    trace_on(1,1);\n\n    for (int i = 0; i < NX; i++) {\n      q_init[i] <<= x[i];\n    }\n\n    func(test_algorithm, q_init, q);\n\n    for (int i = 0; i < NX; i++) {\n      q[i] >>= y[i];\n    }\n\n    trace_off();\n\n    if (!jac) {\n      jac = myalloc2(NX,NX);\n      I = myallocI2(NX);\n      result = myalloc1(NX);\n    }\n\n    timer_.start(jacobian_timer_id_);\n\n    if (force_jacobian < 0) {\n      int rc = zos_forward(1, NX, NX, 1, &x[0], result);\n      if (rc < 0) {\n\tthrow differentiator_exception(\"Error occurred ADOL-C's zos_forward()\");\n      }\n      MINDEC(rc,fov_reverse(1, NX, NX, NX, I, jac));\n    }\n    else if (force_jacobian > 0) {\n      int rc = fov_forward(1, NX, NX, NX, &x[0], I, result, jac);\n      if (rc < 0) {\n\tthrow differentiator_exception(\"Error occurred ADOL-C's fov_forward()\");\n      }\n    }\n    else {\n      ::jacobian(1, NX, NX, &x[0], jac);\n    }\n\n    timer_.stop();\n\n    for (int j=0, index=0; j < NX; j++) {\n      for (int i=0; i < NX; i++, index++) {\n\tjac_[index] = jac[i][j];\n      }\n    }\n    return true;\n  }\n\n  virtual std::string name() const { return \"ADOL-C\"; }\n\nprivate:\n  Real** jac;\n  Real** I;\n  Real* result;\n};\n\n#endif // HAVE_ADOLC\n\n\n#ifdef HAVE_CPPAD\n\n// ================= CPPAD ================================ \n\nclass CppadDifferentiator\n  : public Differentiator {\npublic:\n  typedef CppAD::AD<Real> aReal;\n\n  CppadDifferentiator(Timer& timer, const std::string& name_)\n    : Differentiator(timer) {\n    init_timer(name_); \n    CppAD::thread_alloc::hold_memory(true);\n  }\n    \n  virtual ~CppadDifferentiator() { }\n  \n  virtual bool adjoint(TestAlgorithm test_algorithm,\n\t\t       const std::vector<Real>& x,\n\t\t       std::vector<Real>& y,\n\t\t       const std::vector<Real>& y_AD,\n\t\t       std::vector<Real>& x_AD) {\n    if (x.size() != NX || y_AD.size() != NX) {\n      throw differentiator_exception(\"One of input vectors not of size NX in call to CppadDifferentiator::adjoint\");\n    }\n    y.resize(NX);\n    x_AD.resize(NX);\n\n    std::vector<aReal> q_init(NX);\n    std::vector<aReal> q(NX);\n\n    for (int i = 0; i < NX; i++) {\n      q_init[i] = x[i];\n    }\n\n    CppAD::Independent(q_init);\n\n    func(test_algorithm, q_init, q);\n\n    for (int i = 0; i < NX; i++) {\n      y[i] = CppAD::Value(q[i]);\n    }\n\n    timer_.start(adjoint_prep_timer_id_);\n    CppAD::ADFun<Real> f(q_init, q);\n\n    timer_.start(adjoint_compute_timer_id_);\n    x_AD = f.Reverse(1, y_AD);\n    timer_.stop();\n\n    return true;\n  }\n\n  virtual bool jacobian(TestAlgorithm test_algorithm,\n\t\t\tconst std::vector<Real>& x,\n\t\t\tstd::vector<Real>& y,\n\t\t\tstd::vector<Real>& jac,\n\t\t\tint force_jacobian = 0) {\n    if (x.size() != NX) {\n      throw differentiator_exception(\"Input vector x not of size NX in call to CppadDifferentiator::jacobian\");\n    }\n    y.resize(NX);\n    jac.resize(NX*NX);\n    jac_transpose_.resize(NX*NX);\n\n    std::vector<aReal> q_init(NX);\n    std::vector<aReal> q(NX);\n\n    for (int i = 0; i < NX; i++) {\n      q_init[i] = x[i];\n    }\n\n    CppAD::Independent(q_init);\n\n    func(test_algorithm, q_init, q);\n\n    for (int i = 0; i < NX; i++) {\n      y[i] = CppAD::Value(q[i]);\n    }\n\n    timer_.start(adjoint_prep_timer_id_);\n    CppAD::ADFun<Real> f(q_init, q);\n\n    timer_.start(jacobian_timer_id_);\n\n    if (force_jacobian < 0) {\n      CppAD::JacobianRev(f, x, jac_transpose_);\n    }\n    else if (force_jacobian > 0) {\n      CppAD::JacobianFor(f, x, jac_transpose_);\n    } \n    else {\n      jac_transpose_ = f.Jacobian(x);\n    }\n\n    // Transpose Jacobian because CppAD uses the opposite convention to the other tools\n    Real (&jac_transpose2)[NX][NX]\n      = *reinterpret_cast<Real(*)[NX][NX]>(&jac_transpose_[0]);\n    for (int i = 0, index = 0; i < NX; i++) {\n      for (int j = 0; j < NX; j++, index++) {\n\tjac[index] = jac_transpose2[j][i];\n      }\n    }\n\n    return true;\n  }\n\n  virtual std::string name() const { return \"CppAD\"; }\n\nprivate:\n  std::vector<Real> jac_transpose_;\n};\n\n#endif // HAVE_CPPAD\n\n\n#ifdef HAVE_SACADO\n\n// ================= SACADO ================================ \n\ntemplate<> int Sacado::Rad::ADmemblock<Real>::n_blocks = 0;\n\nclass SacadoDifferentiator\n  : public Differentiator {\npublic:\n  typedef Sacado::Rad::ADvar<Real> aReal;\n  typedef Sacado::ELRFad::DFad<Real> aReal_fad;\n\n  SacadoDifferentiator(Timer& timer, const std::string& name_)\n    : Differentiator(timer) { init_timer(name_); }\n    \n  virtual ~SacadoDifferentiator() { }\n  \n  virtual bool adjoint(TestAlgorithm test_algorithm,\n\t\t       const std::vector<Real>& x,\n\t\t       std::vector<Real>& y,\n\t\t       const std::vector<Real>& y_AD,\n\t\t       std::vector<Real>& x_AD) {\n    if (x.size() != NX || y_AD.size() != NX) {\n      throw differentiator_exception(\"One of input vectors not of size NX in call to SacadoDifferentiator::adjoint\");\n    }\n    y.resize(NX);\n    x_AD.resize(NX);\n\n    std::vector<aReal> q_init(NX);\n    std::vector<aReal> q(NX);\n\n    for (int i = 0; i < NX; i++) {\n      q_init[i] = x[i];\n    }\n\n    func(test_algorithm, q_init, q);\n\n    for (int i = 0; i < NX; i++) {\n      y[i] = q[i].val();\n    }\n\n    timer_.start(base_timer_id_);\n    aReal objective_func = 0.0;\n    for (int i = 0; i < NX; i++) {\n      objective_func += q[i] * y_AD[i];\n    }\n\n    timer_.start(adjoint_compute_timer_id_);\n    Sacado::Rad::ADvar<Real>::Gradcomp();\n    for (int i = 0; i < NX; i++) { \n      x_AD[i] = q_init[i].adj();\n    }\n    timer_.stop();\n\n    return true;\n  }  \n\n\n  virtual bool jacobian(TestAlgorithm test_algorithm,\n\t\t\tconst std::vector<Real>& x,\n\t\t\tstd::vector<Real>& y,\n\t\t\tstd::vector<Real>& jac,\n\t\t\tint force_jacobian = 0) {\n    if (x.size() != NX) {\n      throw differentiator_exception(\"Input vector x not of size NX in call to SacadoDifferentiator::jacobian\");\n    }\n    y.resize(NX);\n    jac.resize(NX*NX);\n\n    std::vector<aReal_fad> q_init(NX);\n    std::vector<aReal_fad> q(NX);\n\n    for (int i = 0; i < NX; i++) {\n      q_init[i] = x[i];\n      q_init[i].resize(NX);\n      q[i].resize(NX);\n      q_init[i].fastAccessDx(i) = 1.0;\n    }\n\n    func(test_algorithm, q_init, q);\n\n    for (int i = 0; i < NX; i++) {\n      y[i] = q[i].val();\n    }\n            \n    int index = 0;\n    for (int i = 0; i < NX; i++) { \n      for (int k = 0; k < NX; k++, index++) {\n\tjac[index] = q[k].dx(i);\n      }\n    }\n    return true;\n  }\n\n  virtual std::string name() const { return \"Sacado (::Rad for adjoint, forward-mode only ::ELRFad for Jacobian)\"; }\n};\n\n#endif // HAVE_SACADO\n\n\n\n\n// The following enum is designed to be used in a \"for\" loop to loop\n// through the available automatic differentiaion tools\nenum AutoDiffTool {\n  AUTODIFF_TOOL_ADEPT = 0\n#ifdef HAVE_ADOLC\n  , AUTODIFF_TOOL_ADOLC\n#endif\n#ifdef HAVE_CPPAD\n  , AUTODIFF_TOOL_CPPAD\n#endif\n#ifdef HAVE_SACADO\n  , AUTODIFF_TOOL_SACADO\n#endif\n  , N_AUTODIFF_TOOLS\n};\n\nconst char* autodiff_tool_string[] = {\n  \"adept\"\n#ifdef HAVE_ADOLC\n  , \"adolc\"\n#endif\n#ifdef HAVE_CPPAD\n  , \"cppad\"\n#endif\n#ifdef HAVE_SACADO\n  , \"sacado\"\n#endif\n};\n\nconst char* autodiff_tool_long_string[] = {\n  \"Adept\"\n#ifdef HAVE_ADOLC\n  , \"ADOL-C\"\n#endif\n#ifdef HAVE_CPPAD\n  , \"CppAD\"\n#endif\n#ifdef HAVE_SACADO\n  , \"Sacado\"\n#endif\n};\n\ninline\nstd::string\nautodiff_tools()\n{\n  std::string tools = autodiff_tool_string[0];\n  for (int i = 1; i < N_AUTODIFF_TOOLS; i++) {\n    tools += \",\";\n    tools += autodiff_tool_string[i];\n  }\n  return tools;\n}\n\n\n// Return pointer to a virtual base object Differentiator\ninline\nDifferentiator* \nnew_differentiator(AutoDiffTool auto_diff_tool, Timer& timer, const std::string& name_)\n{\n  if (auto_diff_tool == AUTODIFF_TOOL_ADEPT) {\n    return new AdeptDifferentiator(timer, name_);\n  }\n#ifdef HAVE_ADOLC\n  else if (auto_diff_tool == AUTODIFF_TOOL_ADOLC) {\n    return new AdolcDifferentiator(timer, name_);\n  }\n#endif\n#ifdef HAVE_CPPAD\n  else if (auto_diff_tool == AUTODIFF_TOOL_CPPAD) {\n    return new CppadDifferentiator(timer, name_);\n  }\n#endif\n#ifdef HAVE_SACADO\n  else if (auto_diff_tool == AUTODIFF_TOOL_SACADO) {\n    return new SacadoDifferentiator(timer, name_);\n  }\n#endif\n  else {\n    return 0;\n  }\n}\n"
  },
  {
    "path": "benchmark/math_benchmark.cpp",
    "content": "/* math_benchmark.cpp - Benchmark mathematical functions\n\n  Copyright (C) 2023 ECMWF\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n*/\n\n#include <iostream>\n#include <adept_arrays.h>\n#include \"Timer.h\"\n\nint main(int argc, const char** argv)\n{\n  using namespace adept;\n  static const int N = 1024;\n  int nrepeat = 1024*16;\n  Vector x(N), y(N);\n\n  Timer timer;\n  timer.print_on_exit(true);\n  int add_id = timer.new_activity(\"addition\");\n  int sub_id = timer.new_activity(\"subtraction\");\n  int mul_id = timer.new_activity(\"multiplication\");\n  int div_id = timer.new_activity(\"division\");\n  int exp_id = timer.new_activity(\"exp\");\n  int fastexp_id = timer.new_activity(\"fastexp\");\n  int log_id = timer.new_activity(\"log\");\n  int sin_id = timer.new_activity(\"sin\");\n\n  x = 1.001;\n  y = x*x;\n  y = 0.0;\n  \n  timer.start(add_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y += x;\n  }\n  timer.stop();\n\n  y = 0.0;\n  \n  timer.start(sub_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y -= x;\n  }\n  timer.stop();\n\n  y = 1.0;\n  \n  timer.start(mul_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y *= x;\n  }\n  timer.stop();\n\n  std::cout << \"y=\" << y(0) << \"\\n\";\n  \n  timer.start(div_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y /= x;\n  }\n  timer.stop();\n\n  x = 0.001;\n  \n  timer.start(exp_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y = exp(x);\n    x = y-1.001;\n  }\n  timer.stop();\n\n  std::cout << \"y=\" << y(0) << \"\\n\";\n  \n  x = 0.001;\n\n \n  timer.start(fastexp_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y = fastexp(x);\n    x = y-1.001;\n  }\n  timer.stop();\n\n  std::cout << \"y=\" << y(0) << \"\\n\";\n  \n  x = 1.001;\n  \n  timer.start(log_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y = log(x);\n    x = y+1.0;\n  }\n  timer.stop();\n\n  std::cout << \"y=\" << y(0) << \"\\n\";\n  \n  x = 1.001;\n  \n  timer.start(sin_id);\n  for (int irepeat = 0; irepeat < nrepeat; ++irepeat) {\n    y = sin(x);\n    y = x;\n  }\n  timer.stop();\n\n  std::cout << \"y=\" << y(0) << \"\\n\";\n\n  std::cout << \"RELATIVE COSTS\\n\";\n  std::cout << \"div/mul = \" << timer.timing(div_id)/timer.timing(mul_id) << \"\\n\";\n  std::cout << \"exp/mul = \" << timer.timing(exp_id)/timer.timing(mul_id) << \"\\n\";\n  std::cout << \"fastexp/mul = \" << timer.timing(fastexp_id)/timer.timing(mul_id) << \"\\n\";\n  std::cout << \"log/mul = \" << timer.timing(log_id)/timer.timing(mul_id) << \"\\n\";\n  std::cout << \"sin/mul = \" << timer.timing(sin_id)/timer.timing(mul_id) << \"\\n\";\n  \n}\n"
  },
  {
    "path": "benchmark/matrix_benchmark.cpp",
    "content": "#include <iostream>\n\n#include <adept_arrays.h>\n\n#include \"Timer.h\"\n\ntemplate<bool IsActive>\ndouble\ntime_matmul(int n, int nrepeat, bool is_col_major)\n{\n  adept::Array<2,double,IsActive> A, B, C;\n  Timer timer;\n  int matmul_timer_id = timer.new_activity(\"matmul\");\n  if (is_col_major) {\n    A.resize_column_major(adept::expression_size(n,n));\n    B.resize_column_major(adept::expression_size(n,n));\n    C.resize_column_major(adept::expression_size(n,n));\n  }\n  else {\n    A.resize(n,n);\n    B.resize(n,n);\n    C.resize(n,n);\n  }\n  for (int irepeat = -nrepeat/10; irepeat < nrepeat; ++irepeat) {\n    A = 1.1;\n    B = 2.2;\n    A.diag_vector() = 3.3;\n    B.diag_vector() = 5.5;\n    if (IsActive) {\n      adept::active_stack()->new_recording();\n    }\n    if (irepeat >= 0) {\n      timer.start(matmul_timer_id);\n    }\n    C = A ** B;\n    if (irepeat >= 0) {\n      timer.stop();\n    }\n  }\n  /*\n  if (IsActive && n < 8) {\n    std::cout << \"C=\" << C;\n    std::cout << *adept::active_stack();\n    adept::active_stack()->print_statements();\n  }\n  */\n  return timer.timing(matmul_timer_id) / nrepeat;\n}\n\ndouble\ntime_solve(int n, int nrepeat, bool is_col_major)\n{\n  adept::Matrix A, B, C;\n  Timer timer;\n  int solve_timer_id = timer.new_activity(\"solve\");\n  if (is_col_major) {\n    A.resize_column_major(adept::expression_size(n,n));\n    B.resize_column_major(adept::expression_size(n,n));\n    C.resize_column_major(adept::expression_size(n,n));\n  }\n  else {\n    A.resize(n,n);\n    B.resize(n,n);\n    C.resize(n,n);\n  }\n  for (int irepeat = -nrepeat/10; irepeat < nrepeat; ++irepeat) {\n    A = 1.1;\n    B = 2.2;\n    A.diag_vector() = 3.3;\n    B.diag_vector() = 5.5;\n    if (irepeat >= 0) {\n      timer.start(solve_timer_id);\n    }\n    C = adept::solve(A, B);\n    if (irepeat >= 0) {\n      timer.stop();\n    }\n  }\n\n  return timer.timing(solve_timer_id) / nrepeat;\n}\n\n\nint\nmain(int argc, char* argv[])\n{\n  int ibegin = 1;\n  int iend = 8;\n  int nrepeat = 20;\n  bool is_col_major = false;\n\n  adept::Stack stack;\n  int n = 2;\n  std::cout << \"Average cost per operation (\" << nrepeat << \" repeats)\\n\";\n  std::cout << \"Dense N-by-N matrix-matrix multiplication\\n\";\n  //std::cout << \" N        inactive time (us)   inactive flops    active time (us)    active flops\\n\";\n  std::cout << \"N \\tinactive time (us) \\tactive time (us)\\n\";\n  for (int i = ibegin; i <= iend; ++i) {\n    std::cout << n << \" \\t\";\n\n    double t = time_matmul<false>(n, nrepeat, is_col_major);\n    //    std::cout << t*1.0e6 << \"  \" << (n*n*n) / t << \"  \";\n    std::cout << t*1.0e6/nrepeat << \" \\t\\t\\t\";\n\n    t = time_matmul<true>(n, nrepeat, is_col_major);\n    //    std::cout << t*1.0e6 << \"  \" << (n*n*n) / t;\n    std::cout << t*1.0e6/nrepeat;\n\n    std::cout << \"\\n\";\n\n    n *= 2;\n  }\n  \n  n = 2;\n  std::cout << \"Dense N-by-N matrix-matrix solve\\n\";\n  std::cout << \"N \\tinactive time (us)\\n\";\n  for (int i = ibegin; i <= iend; ++i) {\n    std::cout << n << \" \\t\";\n\n    double t = time_solve(n, nrepeat, is_col_major);\n    //    std::cout << t*1.0e6 << \"  \" << (n*n*n) / t << \"  \";\n    std::cout << t*1.0e6/nrepeat << \"\\n\";\n\n    n *= 2;\n  }\n  return 0;\n}\n"
  },
  {
    "path": "benchmark/nx.h",
    "content": "#ifndef NX\n#define NX 100\n#endif\n"
  },
  {
    "path": "config_platform_independent.h.in",
    "content": "/* config_platform_independent.h.in. */\n\n/* Name of package */\n#undef PACKAGE\n\n/* Define to the address where bug reports for this package should be sent. */\n#undef PACKAGE_BUGREPORT\n\n/* Define to the full name of this package. */\n#undef PACKAGE_NAME\n\n/* Define to the full name and version of this package. */\n#undef PACKAGE_STRING\n\n/* Define to the one symbol short name of this package. */\n#undef PACKAGE_TARNAME\n\n/* Define to the home page for this package. */\n#undef PACKAGE_URL\n\n/* Define to the version of this package. */\n#undef PACKAGE_VERSION\n\n/* Version number of package */\n#undef VERSION\n\n"
  },
  {
    "path": "configure.ac",
    "content": "# Configure autoconf for the Adept library\n\n### GENERAL CONFIGURATION ###\n\nAC_PREREQ([2.61])\nAC_INIT([adept], [2.1.3], [r.j.hogan@ecmwf.int], [adept], [http://www.met.reading.ac.uk/clouds/adept/])\nAC_LANG([C++])\nAC_CONFIG_SRCDIR([adept/Stack.cpp])\nAC_CONFIG_HEADERS([config.h config_platform_independent.h])\nAM_INIT_AUTOMAKE([foreign -Wall -Werror])\nAC_CONFIG_MACRO_DIR([m4])\n\n# Checks for programs\nAC_PROG_CXX\nAC_PROG_F77\nAC_PROG_MAKE_SET\nm4_ifdef([AM_PROG_AR],[AM_PROG_AR])\nAC_PROG_LIBTOOL\n\n# Check for system features\nAC_CHECK_HEADERS([sys/time.h])\nAC_CHECK_FUNCS([gettimeofday pow sqrt])\n\n# Check for OpenMP\nAC_OPENMP\nAC_SUBST(AM_CXXFLAGS,\"$OPENMP_CXXFLAGS\")\n\n#### LIBRARIES NEEDED BY ADEPT ###\n\nif test \"x$F77\" = x\nthen\n\tAC_MSG_NOTICE([Not checking for BLAS and LAPACK because no Fortran compiler found])\nelse\n\t# Check for BLAS and LAPACK\n\t# First we need this since the libraries are Fortran called from C++\n\tAC_F77_LIBRARY_LDFLAGS\n\t# The following tests for both BLAS and LAPACK\n\tAX_LAPACK\nfi\n\n# Dependencies dictate the following order of libraries\nLIBS=\"$LAPACK_LIBS $BLAS_LIBS $LIBS\"\n# FLIBS should be included in LDADD or LIBADD in the relevant\n# Makefile.am\n\n# If the BLAS library is OpenBLAS then we need to give the user the\n# option to change the number of threads, since OpenBLAS's pthreads\n# can clash with Adept's use of OpenMP, leading to suboptimal\n# performance.\nac_have_openblas_cblas_header=no\n\nif test \"$ax_blas_ok\" = yes\nthen\n\tif test \"x$BLAS_LIBS\" = \"x-lopenblas\"\n\tthen\t\n\t\tAC_MSG_CHECKING([whether cblas.h is from OpenBLAS])\n\t\tAC_TRY_LINK([#include <cblas.h>],\n\t\t[openblas_set_num_threads(1)],\n\t\t[ac_have_openblas_cblas_header=yes\n\t\tAC_MSG_RESULT(yes)\n\t\tAC_DEFINE([HAVE_OPENBLAS_CBLAS_HEADER],1,[Is the clbas.h header file from OpenBLAS?])],\n\t\tAC_MSG_RESULT(no))\n\tfi\nfi\n\n### LIBRARIES THAT MAY BE USED BY TEST PROGRAMS ###\n\n# Checks for GNU Scientific Library\nAC_CHECK_LIB([gsl],[gsl_multimin_fdfminimizer_alloc],[AC_MSG_NOTICE([Note that GSL is not used by Adept, just by one of the test programs])])\nAC_SUBST(USE_GSL, [\"$ac_cv_lib_gsl_gsl_multimin_fdfminimizer_alloc\"])\n\n# Check for ADOL-C automatic differentiation library\nAC_CHECK_HEADERS([adolc/adolc.h])\nAC_CHECK_LIB([adolc],[tapestats])\n\n# Check for SACADO automatic differentiation library\nac_have_sacado=no\nsave_LIBS=$LIBS\nLIBS=\"$LIBS -lsacado -lteuchos\"\nAC_MSG_CHECKING([whether Sacado is installed])\nAC_TRY_LINK([#include <Sacado.hpp>],\n[Sacado::ELRFad::DFad<double> v = 1.0],\n[ac_have_sacado=yes\nAC_MSG_RESULT(yes)\nAC_DEFINE([HAVE_SACADO],1,[Is the Sacado library working?])],\n[LIBS=$save_LIBS\nAC_MSG_RESULT(no)])\n\n# Check for CppAD automatic differentiation library\nAC_CHECK_HEADERS([cppad/cppad.hpp])\nif test \"$ac_cv_header_cppad_cppad_hpp\" = yes\nthen\n   AC_DEFINE([NDEBUG],1,[If CppAD is being used by the benchmarking program then it is much faster with debugging disabled])\nfi\n\n\n### CREATE MAKEFILES AND CONFIG HEADER ###\n\nAC_CONFIG_FILES([Makefile makefile_include adept/Makefile include/Makefile benchmark/Makefile])\n\nAC_DEFINE_UNQUOTED([CXX],[\"$CXX\"],[C++ compiler])\nAC_DEFINE_UNQUOTED([CXXFLAGS],[\"$CXXFLAGS\"],[Flags passed to C++ compiler])\nAC_DEFINE_UNQUOTED([BLAS_LIBS],[\"$BLAS_LIBS\"],[BLAS library option])\n\nAH_BOTTOM([/* Use ADOLC only if both the library and the header files are available */\n#if defined( HAVE_LIBADOLC ) && defined( HAVE_ADOLC_ADOLC_H )\n#define HAVE_ADOLC 1\n#endif])\nAH_BOTTOM([/* Use CPPAD if the header files are available */\n#if defined( HAVE_CPPAD_CPPAD_HPP )\n#define HAVE_CPPAD 1\n#endif])\n\nAC_OUTPUT\n\n\n### REPORT CONFIGURATION TO THE USER ###\n\nAC_MSG_NOTICE([********************* Summary **************************************])\nAC_MSG_NOTICE([  CXX = $CXX ])\nAC_MSG_NOTICE([  CPPFLAGS = $CPPFLAGS])\nAC_MSG_NOTICE([  CXXFLAGS = $CXXFLAGS $OPENMP_CXXFLAGS])\nAC_MSG_NOTICE([  LDFLAGS =  $LDFLAGS])\nAC_MSG_NOTICE([  LIBS = $LIBS])\nAC_MSG_NOTICE([Typing \"make; make install\" will install Adept header files in $includedir])\nAC_MSG_NOTICE([and the static and shared libraries as $libdir/libadept.*, where])\nAC_MSG_NOTICE([prefix=$prefix])\nAC_MSG_NOTICE([********************* Libraries used by Adept **********************])\nac_warn_given=no\nif test \"$ax_blas_ok\" = yes\nthen\n\tAC_MSG_NOTICE([BLAS (Basic Linear Algebra Subprograms) will be used: BLAS_LIBS = $BLAS_LIBS])\n\tif test \"$ac_have_openblas_cblas_header\" = yes\n\tthen\n\t   AC_MSG_NOTICE([  Number of BLAS threads may be controlled at run time])\n\tfi\nelse\n\tAC_MSG_NOTICE([BLAS (Basic Linear Algebra Subprograms) will not be used: MATRIX MULTIPLICATION IS UNAVAILABLE])\n\tac_warn_given=yes\nfi\nif test \"$ax_lapack_ok\" = yes\nthen\n\tAC_MSG_NOTICE([LAPACK (Linear Algebra Package) will be used: LAPACK_LIBS = $LAPACK_LIBS])\nelse\n\tAC_MSG_NOTICE([LAPACK (Linear Algebra Package) will not be used: LINEAR ALGEBRA ROUTINES ARE UNAVAILABLE])\n\tac_warn_given=yes\nfi\n\nAC_MSG_NOTICE([********************* Libraries used by test programs **************])\n\nif test \"$ac_cv_lib_gsl_gsl_multimin_fdfminimizer_alloc\" = no\nthen\n\tAC_MSG_NOTICE([GNU Scientific Library (GSL) not found; Adept will compile all the])\n\tAC_MSG_NOTICE([example programs except test/test_gsl_interface.])\n\tac_warn_given=yes\nelse\n\tAC_MSG_NOTICE([GNU Scientific Library (GSL) found; Adept will compile all the])\n\tAC_MSG_NOTICE([example programs.])\nfi\n\nAC_MSG_NOTICE([********************* Benchmark program ****************************])\nAC_MSG_NOTICE([The benchmarking program, \"benchmark/advection_benchmark\", will be])\nAC_MSG_NOTICE([compiled with support for these automatic differentiation libraries:])\nAC_MSG_NOTICE([   Adept: yes])\n\nif test \"$ac_cv_lib_adolc_tapestats\" = yes -a \"$ac_cv_header_adolc_adolc_h\" = yes\nthen\n   \tAC_MSG_NOTICE([   ADOLC: yes])\nelse\n\tAC_MSG_NOTICE([   ADOLC: no])\n\tac_warn_given=yes\nfi\n\nif test \"$ac_cv_header_cppad_cppad_hpp\" = yes\nthen\n   \tAC_MSG_NOTICE([   CppAD: yes])\nelse\n\tAC_MSG_NOTICE([   CppAD: no])\n\tac_warn_given=yes\nfi\n\nif test \"$ac_have_sacado\" = no\nthen\n\tAC_MSG_NOTICE([   Sacado: no])\n\tac_warn_given=yes\nelse\n\tAC_MSG_NOTICE([   Sacado: yes])\nfi\n\nAC_MSG_NOTICE([********************* Top tips *************************************])\nAC_MSG_NOTICE([To use a higher than default optimization level, call this configure])\nAC_MSG_NOTICE([script with something like: ./configure \"CXXFLAGS=-g -O3\"])\nAC_MSG_NOTICE([If you have libraries in non-standard locations, specify their location])\nAC_MSG_NOTICE([by calling this script with something like:])\nAC_MSG_NOTICE([  ./configure CPPFLAGS=-I/local/include LDFLAGS=\"-L/local/lib -Wl,-rpath,/local/lib\"])\nAC_MSG_NOTICE([The rpath argument is especially useful for locating the BLAS and LAPACK])\nAC_MSG_NOTICE([libraries if they are in non-standard locations, so that executables])\nAC_MSG_NOTICE([built with Adept do not need to use the LD_LIBRARY_PATH environment])\nAC_MSG_NOTICE([variable to specify their locations at run-time.])\nAC_MSG_NOTICE([********************************************************************])\n\n"
  },
  {
    "path": "doc/COPYING",
    "content": "\n                GNU Free Documentation License\n                 Version 1.3, 3 November 2008\n\n\n Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.\n     <http://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n0. PREAMBLE\n\nThe purpose of this License is to make a manual, textbook, or other\nfunctional and useful document \"free\" in the sense of freedom: to\nassure everyone the effective freedom to copy and redistribute it,\nwith or without modifying it, either commercially or noncommercially.\nSecondarily, this License preserves for the author and publisher a way\nto get credit for their work, while not being considered responsible\nfor modifications made by others.\n\nThis License is a kind of \"copyleft\", which means that derivative\nworks of the document must themselves be free in the same sense.  It\ncomplements the GNU General Public License, which is a copyleft\nlicense designed for free software.\n\nWe have designed this License in order to use it for manuals for free\nsoftware, because free software needs free documentation: a free\nprogram should come with manuals providing the same freedoms that the\nsoftware does.  But this License is not limited to software manuals;\nit can be used for any textual work, regardless of subject matter or\nwhether it is published as a printed book.  We recommend this License\nprincipally for works whose purpose is instruction or reference.\n\n\n1. APPLICABILITY AND DEFINITIONS\n\nThis License applies to any manual or other work, in any medium, that\ncontains a notice placed by the copyright holder saying it can be\ndistributed under the terms of this License.  Such a notice grants a\nworld-wide, royalty-free license, unlimited in duration, to use that\nwork under the conditions stated herein.  The \"Document\", below,\nrefers to any such manual or work.  Any member of the public is a\nlicensee, and is addressed as \"you\".  You accept the license if you\ncopy, modify or distribute the work in a way requiring permission\nunder copyright law.\n\nA \"Modified Version\" of the Document means any work containing the\nDocument or a portion of it, either copied verbatim, or with\nmodifications and/or translated into another language.\n\nA \"Secondary Section\" is a named appendix or a front-matter section of\nthe Document that deals exclusively with the relationship of the\npublishers or authors of the Document to the Document's overall\nsubject (or to related matters) and contains nothing that could fall\ndirectly within that overall subject.  (Thus, if the Document is in\npart a textbook of mathematics, a Secondary Section may not explain\nany mathematics.)  The relationship could be a matter of historical\nconnection with the subject or with related matters, or of legal,\ncommercial, philosophical, ethical or political position regarding\nthem.\n\nThe \"Invariant Sections\" are certain Secondary Sections whose titles\nare designated, as being those of Invariant Sections, in the notice\nthat says that the Document is released under this License.  If a\nsection does not fit the above definition of Secondary then it is not\nallowed to be designated as Invariant.  The Document may contain zero\nInvariant Sections.  If the Document does not identify any Invariant\nSections then there are none.\n\nThe \"Cover Texts\" are certain short passages of text that are listed,\nas Front-Cover Texts or Back-Cover Texts, in the notice that says that\nthe Document is released under this License.  A Front-Cover Text may\nbe at most 5 words, and a Back-Cover Text may be at most 25 words.\n\nA \"Transparent\" copy of the Document means a machine-readable copy,\nrepresented in a format whose specification is available to the\ngeneral public, that is suitable for revising the document\nstraightforwardly with generic text editors or (for images composed of\npixels) generic paint programs or (for drawings) some widely available\ndrawing editor, and that is suitable for input to text formatters or\nfor automatic translation to a variety of formats suitable for input\nto text formatters.  A copy made in an otherwise Transparent file\nformat whose markup, or absence of markup, has been arranged to thwart\nor discourage subsequent modification by readers is not Transparent.\nAn image format is not Transparent if used for any substantial amount\nof text.  A copy that is not \"Transparent\" is called \"Opaque\".\n\nExamples of suitable formats for Transparent copies include plain\nASCII without markup, Texinfo input format, LaTeX input format, SGML\nor XML using a publicly available DTD, and standard-conforming simple\nHTML, PostScript or PDF designed for human modification.  Examples of\ntransparent image formats include PNG, XCF and JPG.  Opaque formats\ninclude proprietary formats that can be read and edited only by\nproprietary word processors, SGML or XML for which the DTD and/or\nprocessing tools are not generally available, and the\nmachine-generated HTML, PostScript or PDF produced by some word\nprocessors for output purposes only.\n\nThe \"Title Page\" means, for a printed book, the title page itself,\nplus such following pages as are needed to hold, legibly, the material\nthis License requires to appear in the title page.  For works in\nformats which do not have any title page as such, \"Title Page\" means\nthe text near the most prominent appearance of the work's title,\npreceding the beginning of the body of the text.\n\nThe \"publisher\" means any person or entity that distributes copies of\nthe Document to the public.\n\nA section \"Entitled XYZ\" means a named subunit of the Document whose\ntitle either is precisely XYZ or contains XYZ in parentheses following\ntext that translates XYZ in another language.  (Here XYZ stands for a\nspecific section name mentioned below, such as \"Acknowledgements\",\n\"Dedications\", \"Endorsements\", or \"History\".)  To \"Preserve the Title\"\nof such a section when you modify the Document means that it remains a\nsection \"Entitled XYZ\" according to this definition.\n\nThe Document may include Warranty Disclaimers next to the notice which\nstates that this License applies to the Document.  These Warranty\nDisclaimers are considered to be included by reference in this\nLicense, but only as regards disclaiming warranties: any other\nimplication that these Warranty Disclaimers may have is void and has\nno effect on the meaning of this License.\n\n2. VERBATIM COPYING\n\nYou may copy and distribute the Document in any medium, either\ncommercially or noncommercially, provided that this License, the\ncopyright notices, and the license notice saying this License applies\nto the Document are reproduced in all copies, and that you add no\nother conditions whatsoever to those of this License.  You may not use\ntechnical measures to obstruct or control the reading or further\ncopying of the copies you make or distribute.  However, you may accept\ncompensation in exchange for copies.  If you distribute a large enough\nnumber of copies you must also follow the conditions in section 3.\n\nYou may also lend copies, under the same conditions stated above, and\nyou may publicly display copies.\n\n\n3. COPYING IN QUANTITY\n\nIf you publish printed copies (or copies in media that commonly have\nprinted covers) of the Document, numbering more than 100, and the\nDocument's license notice requires Cover Texts, you must enclose the\ncopies in covers that carry, clearly and legibly, all these Cover\nTexts: Front-Cover Texts on the front cover, and Back-Cover Texts on\nthe back cover.  Both covers must also clearly and legibly identify\nyou as the publisher of these copies.  The front cover must present\nthe full title with all words of the title equally prominent and\nvisible.  You may add other material on the covers in addition.\nCopying with changes limited to the covers, as long as they preserve\nthe title of the Document and satisfy these conditions, can be treated\nas verbatim copying in other respects.\n\nIf the required texts for either cover are too voluminous to fit\nlegibly, you should put the first ones listed (as many as fit\nreasonably) on the actual cover, and continue the rest onto adjacent\npages.\n\nIf you publish or distribute Opaque copies of the Document numbering\nmore than 100, you must either include a machine-readable Transparent\ncopy along with each Opaque copy, or state in or with each Opaque copy\na computer-network location from which the general network-using\npublic has access to download using public-standard network protocols\na complete Transparent copy of the Document, free of added material.\nIf you use the latter option, you must take reasonably prudent steps,\nwhen you begin distribution of Opaque copies in quantity, to ensure\nthat this Transparent copy will remain thus accessible at the stated\nlocation until at least one year after the last time you distribute an\nOpaque copy (directly or through your agents or retailers) of that\nedition to the public.\n\nIt is requested, but not required, that you contact the authors of the\nDocument well before redistributing any large number of copies, to\ngive them a chance to provide you with an updated version of the\nDocument.\n\n\n4. MODIFICATIONS\n\nYou may copy and distribute a Modified Version of the Document under\nthe conditions of sections 2 and 3 above, provided that you release\nthe Modified Version under precisely this License, with the Modified\nVersion filling the role of the Document, thus licensing distribution\nand modification of the Modified Version to whoever possesses a copy\nof it.  In addition, you must do these things in the Modified Version:\n\nA. Use in the Title Page (and on the covers, if any) a title distinct\n   from that of the Document, and from those of previous versions\n   (which should, if there were any, be listed in the History section\n   of the Document).  You may use the same title as a previous version\n   if the original publisher of that version gives permission.\nB. List on the Title Page, as authors, one or more persons or entities\n   responsible for authorship of the modifications in the Modified\n   Version, together with at least five of the principal authors of the\n   Document (all of its principal authors, if it has fewer than five),\n   unless they release you from this requirement.\nC. State on the Title page the name of the publisher of the\n   Modified Version, as the publisher.\nD. Preserve all the copyright notices of the Document.\nE. Add an appropriate copyright notice for your modifications\n   adjacent to the other copyright notices.\nF. Include, immediately after the copyright notices, a license notice\n   giving the public permission to use the Modified Version under the\n   terms of this License, in the form shown in the Addendum below.\nG. Preserve in that license notice the full lists of Invariant Sections\n   and required Cover Texts given in the Document's license notice.\nH. Include an unaltered copy of this License.\nI. Preserve the section Entitled \"History\", Preserve its Title, and add\n   to it an item stating at least the title, year, new authors, and\n   publisher of the Modified Version as given on the Title Page.  If\n   there is no section Entitled \"History\" in the Document, create one\n   stating the title, year, authors, and publisher of the Document as\n   given on its Title Page, then add an item describing the Modified\n   Version as stated in the previous sentence.\nJ. Preserve the network location, if any, given in the Document for\n   public access to a Transparent copy of the Document, and likewise\n   the network locations given in the Document for previous versions\n   it was based on.  These may be placed in the \"History\" section.\n   You may omit a network location for a work that was published at\n   least four years before the Document itself, or if the original\n   publisher of the version it refers to gives permission.\nK. For any section Entitled \"Acknowledgements\" or \"Dedications\",\n   Preserve the Title of the section, and preserve in the section all\n   the substance and tone of each of the contributor acknowledgements\n   and/or dedications given therein.\nL. Preserve all the Invariant Sections of the Document,\n   unaltered in their text and in their titles.  Section numbers\n   or the equivalent are not considered part of the section titles.\nM. Delete any section Entitled \"Endorsements\".  Such a section\n   may not be included in the Modified Version.\nN. Do not retitle any existing section to be Entitled \"Endorsements\"\n   or to conflict in title with any Invariant Section.\nO. Preserve any Warranty Disclaimers.\n\nIf the Modified Version includes new front-matter sections or\nappendices that qualify as Secondary Sections and contain no material\ncopied from the Document, you may at your option designate some or all\nof these sections as invariant.  To do this, add their titles to the\nlist of Invariant Sections in the Modified Version's license notice.\nThese titles must be distinct from any other section titles.\n\nYou may add a section Entitled \"Endorsements\", provided it contains\nnothing but endorsements of your Modified Version by various\nparties--for example, statements of peer review or that the text has\nbeen approved by an organization as the authoritative definition of a\nstandard.\n\nYou may add a passage of up to five words as a Front-Cover Text, and a\npassage of up to 25 words as a Back-Cover Text, to the end of the list\nof Cover Texts in the Modified Version.  Only one passage of\nFront-Cover Text and one of Back-Cover Text may be added by (or\nthrough arrangements made by) any one entity.  If the Document already\nincludes a cover text for the same cover, previously added by you or\nby arrangement made by the same entity you are acting on behalf of,\nyou may not add another; but you may replace the old one, on explicit\npermission from the previous publisher that added the old one.\n\nThe author(s) and publisher(s) of the Document do not by this License\ngive permission to use their names for publicity for or to assert or\nimply endorsement of any Modified Version.\n\n\n5. COMBINING DOCUMENTS\n\nYou may combine the Document with other documents released under this\nLicense, under the terms defined in section 4 above for modified\nversions, provided that you include in the combination all of the\nInvariant Sections of all of the original documents, unmodified, and\nlist them all as Invariant Sections of your combined work in its\nlicense notice, and that you preserve all their Warranty Disclaimers.\n\nThe combined work need only contain one copy of this License, and\nmultiple identical Invariant Sections may be replaced with a single\ncopy.  If there are multiple Invariant Sections with the same name but\ndifferent contents, make the title of each such section unique by\nadding at the end of it, in parentheses, the name of the original\nauthor or publisher of that section if known, or else a unique number.\nMake the same adjustment to the section titles in the list of\nInvariant Sections in the license notice of the combined work.\n\nIn the combination, you must combine any sections Entitled \"History\"\nin the various original documents, forming one section Entitled\n\"History\"; likewise combine any sections Entitled \"Acknowledgements\",\nand any sections Entitled \"Dedications\".  You must delete all sections\nEntitled \"Endorsements\".\n\n\n6. COLLECTIONS OF DOCUMENTS\n\nYou may make a collection consisting of the Document and other\ndocuments released under this License, and replace the individual\ncopies of this License in the various documents with a single copy\nthat is included in the collection, provided that you follow the rules\nof this License for verbatim copying of each of the documents in all\nother respects.\n\nYou may extract a single document from such a collection, and\ndistribute it individually under this License, provided you insert a\ncopy of this License into the extracted document, and follow this\nLicense in all other respects regarding verbatim copying of that\ndocument.\n\n\n7. AGGREGATION WITH INDEPENDENT WORKS\n\nA compilation of the Document or its derivatives with other separate\nand independent documents or works, in or on a volume of a storage or\ndistribution medium, is called an \"aggregate\" if the copyright\nresulting from the compilation is not used to limit the legal rights\nof the compilation's users beyond what the individual works permit.\nWhen the Document is included in an aggregate, this License does not\napply to the other works in the aggregate which are not themselves\nderivative works of the Document.\n\nIf the Cover Text requirement of section 3 is applicable to these\ncopies of the Document, then if the Document is less than one half of\nthe entire aggregate, the Document's Cover Texts may be placed on\ncovers that bracket the Document within the aggregate, or the\nelectronic equivalent of covers if the Document is in electronic form.\nOtherwise they must appear on printed covers that bracket the whole\naggregate.\n\n\n8. TRANSLATION\n\nTranslation is considered a kind of modification, so you may\ndistribute translations of the Document under the terms of section 4.\nReplacing Invariant Sections with translations requires special\npermission from their copyright holders, but you may include\ntranslations of some or all Invariant Sections in addition to the\noriginal versions of these Invariant Sections.  You may include a\ntranslation of this License, and all the license notices in the\nDocument, and any Warranty Disclaimers, provided that you also include\nthe original English version of this License and the original versions\nof those notices and disclaimers.  In case of a disagreement between\nthe translation and the original version of this License or a notice\nor disclaimer, the original version will prevail.\n\nIf a section in the Document is Entitled \"Acknowledgements\",\n\"Dedications\", or \"History\", the requirement (section 4) to Preserve\nits Title (section 1) will typically require changing the actual\ntitle.\n\n\n9. TERMINATION\n\nYou may not copy, modify, sublicense, or distribute the Document\nexcept as expressly provided under this License.  Any attempt\notherwise to copy, modify, sublicense, or distribute it is void, and\nwill automatically terminate your rights under this License.\n\nHowever, if you cease all violation of this License, then your license\nfrom a particular copyright holder is reinstated (a) provisionally,\nunless and until the copyright holder explicitly and finally\nterminates your license, and (b) permanently, if the copyright holder\nfails to notify you of the violation by some reasonable means prior to\n60 days after the cessation.\n\nMoreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\nTermination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, receipt of a copy of some or all of the same material does\nnot give you any rights to use it.\n\n\n10. FUTURE REVISIONS OF THIS LICENSE\n\nThe Free Software Foundation may publish new, revised versions of the\nGNU Free Documentation License from time to time.  Such new versions\nwill be similar in spirit to the present version, but may differ in\ndetail to address new problems or concerns.  See\nhttp://www.gnu.org/copyleft/.\n\nEach version of the License is given a distinguishing version number.\nIf the Document specifies that a particular numbered version of this\nLicense \"or any later version\" applies to it, you have the option of\nfollowing the terms and conditions either of that specified version or\nof any later version that has been published (not as a draft) by the\nFree Software Foundation.  If the Document does not specify a version\nnumber of this License, you may choose any version ever published (not\nas a draft) by the Free Software Foundation.  If the Document\nspecifies that a proxy can decide which future versions of this\nLicense can be used, that proxy's public statement of acceptance of a\nversion permanently authorizes you to choose that version for the\nDocument.\n\n11. RELICENSING\n\n\"Massive Multiauthor Collaboration Site\" (or \"MMC Site\") means any\nWorld Wide Web server that publishes copyrightable works and also\nprovides prominent facilities for anybody to edit those works.  A\npublic wiki that anybody can edit is an example of such a server.  A\n\"Massive Multiauthor Collaboration\" (or \"MMC\") contained in the site\nmeans any set of copyrightable works thus published on the MMC site.\n\n\"CC-BY-SA\" means the Creative Commons Attribution-Share Alike 3.0 \nlicense published by Creative Commons Corporation, a not-for-profit \ncorporation with a principal place of business in San Francisco, \nCalifornia, as well as future copyleft versions of that license \npublished by that same organization.\n\n\"Incorporate\" means to publish or republish a Document, in whole or in \npart, as part of another Document.\n\nAn MMC is \"eligible for relicensing\" if it is licensed under this \nLicense, and if all works that were first published under this License \nsomewhere other than this MMC, and subsequently incorporated in whole or \nin part into the MMC, (1) had no cover texts or invariant sections, and \n(2) were thus incorporated prior to November 1, 2008.\n\nThe operator of an MMC Site may republish an MMC contained in the site\nunder CC-BY-SA on the same site at any time before August 1, 2009,\nprovided the MMC is eligible for relicensing.\n\n\nADDENDUM: How to use this License for your documents\n\nTo use this License in a document you have written, include a copy of\nthe License in the document and put the following copyright and\nlicense notices just after the title page:\n\n    Copyright (c)  YEAR  YOUR NAME.\n    Permission is granted to copy, distribute and/or modify this document\n    under the terms of the GNU Free Documentation License, Version 1.3\n    or any later version published by the Free Software Foundation;\n    with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.\n    A copy of the license is included in the section entitled \"GNU\n    Free Documentation License\".\n\nIf you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,\nreplace the \"with...Texts.\" line with this:\n\n    with the Invariant Sections being LIST THEIR TITLES, with the\n    Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.\n\nIf you have Invariant Sections without Cover Texts, or some other\ncombination of the three, merge those two alternatives to suit the\nsituation.\n\nIf your document contains nontrivial examples of program code, we\nrecommend releasing these examples in parallel under your choice of\nfree software license, such as the GNU General Public License,\nto permit their use in free software.\n"
  },
  {
    "path": "doc/Makefile",
    "content": "# If you have pdflatex installed, type \"make\" to create the\n# documentation, \"make clean\" to delete it\n\ndocumentation: adept_documentation.pdf adept_reference.pdf\n\nadept_documentation.pdf: adept_documentation.tex\n\tpdflatex adept_documentation.tex\n\tpdflatex adept_documentation.tex\n\tpdflatex adept_documentation.tex\n\nadept_reference.pdf: adept_reference.tex\n\tpdflatex adept_reference.tex\n\nclean:\n\trm -f adept_documentation.pdf adept_reference.pdf\n\n.PHONY: documentation clean\n"
  },
  {
    "path": "doc/README",
    "content": "This directory contains the LaTeX source files for the Adept User\nGuide and Adept Reference Sheet\n\nType \"make\" to create the corresponding PDF files (using pdflatex),\nand \"make clean\" to delete them\n\nPermission is granted to copy, distribute and/or modify the Adept User\nGuide and Adept Reference Sheet under the terms of the GNU Free\nDocumentation License, Version 1.3 or any later version published by\nthe Free Software Foundation. This license may be found at\nhttp://www.gnu.org/copyleft/fdl.html, and in this directory in the\n\"COPYING\" file. As an exception, no copyright is asserted for the code\nfragments in the document (indicated in the text with a light-grey\nbackground); these code fragments are in the Public Domain and may be\ncopied, modified and distributed without restriction.\n"
  },
  {
    "path": "doc/adept_documentation.tex",
    "content": "% \n% Adept automatic differentiation library for C++: User guide\n%\n% Type \"pdflatex adept_documentation.tex\" twice to recreate the PDF\n% file (or type \"make pdf\" in this directory after running the\n% configure script one directory above).\n%\n% Permission is granted to copy, distribute and/or modify this\n% document under the terms of the GNU Free Documentation License,\n% Version 1.3 or any later version published by the Free Software\n% Foundation. This license may be found at\n% http://www.gnu.org/copyleft/fdl.html, and in this directory in the\n% \"COPYING\" file. As an exception, no copyright is asserted for the\n% code fragments in this document (indicated in the text with a\n% light-grey background); these code fragments are in the Public\n% Domain and may be copied, modified and distributed without\n% restriction.\n\n\\documentclass[a4,oneside]{book}\n\\usepackage[colorlinks=true,linkcolor=blue,citecolor=blue]{hyperref}\n\\usepackage{natbib}\n\\usepackage{times}\n\\usepackage{listings}\n\\usepackage{xcolor}\n\\usepackage{color}\n\\usepackage{marginnote}\n\\usepackage{rotating}\n\n\\usepackage{mdframed,lipsum}\n\\newmdenv[\n  leftmargin = 0pt,\n  innerleftmargin = 1em,\n  innertopmargin = 0pt,\n  innerbottommargin = 0pt,\n  innerrightmargin = 0pt,\n  rightmargin = 0pt,\n  linewidth = 1pt,\n  topline = false,\n  rightline = false,\n  bottomline = false\n  ]{leftbar}\n\n% Set math in Times Roman\n\\DeclareSymbolFont{letters}{OML}{ptmcm}{m}{it}\n\\DeclareSymbolFont{operators}{OT1}{ptmcm}{m}{n}\n\n% Page set up\n\\setlength{\\oddsidemargin}{0cm} %{0.5cm}\n\\setlength{\\evensidemargin}{0cm} %{0.5cm}\n\\setlength{\\topmargin}{-2cm}\n\\setlength{\\textheight}{24cm}\n\\setlength{\\textwidth}{16cm}\n\\setlength{\\marginparsep}{0.5cm}\n\\setlength{\\marginparwidth}{0cm}\n\\setlength{\\parindent}{1em}\n\\setlength{\\parskip}{0cm}\n\\renewcommand{\\baselinestretch}{1.1}\n\\sloppy\n\n% Configure appearance of code listings\n\\definecolor{light-gray}{gray}{0.92}\n\\def\\codesize{\\small}\n\\lstset{language=C++,\n  backgroundcolor=\\color{light-gray},\n  numbersep=5pt,\n  xleftmargin=0cm,\n  xrightmargin=0cm,\n  basicstyle=\\footnotesize\\ttfamily,\n  emph={adouble,xdouble,Stack,adept,Array,FixedArray,Vector,aVector,aReal,Optimizable,Real,Minimizer,MinimizerStatus,Matrix,aMatrix,Array3D,aArray3D,intVector,boolVector,floatVector,floatMatrix,intMatrix,FortranArray,SpecialMatrix,SquareMatrix,aSquareMatrix,SymmMatrix,aSymmMatrix,UpperMatrix,LowerMatrix,IndexVector,adept_arrays,adept_optimize,adept_fortran},\n  emphstyle=\\bfseries\\color{red}}\n\\lstset{showstringspaces=false}\n\n% Table-of-contents configuration\n\\usepackage{tocloft}\n\\setlength\\cftparskip{-2pt}\n\\setlength\\cftbeforesecskip{1pt}\n\\setlength\\cftaftertoctitleskip{2pt}\n\\renewcommand\\cftsecfont{\\normalfont}\n\\renewcommand\\cftsecpagefont{\\normalfont}\n\\renewcommand{\\cftsecleader}{\\cftdotfill{\\cftsecdotsep}}\n\\renewcommand\\cftsecdotsep{\\cftdot}\n\\renewcommand\\cftsubsecdotsep{\\cftdot}\n\n% Page headers\n\\usepackage{fancyhdr}\n\\pagestyle{fancy}\n\\renewcommand{\\headrulewidth}{0.5pt}\n\\renewcommand{\\sectionmark}[1]{\\markright{\\thesection.\\ #1}}\n\\renewcommand{\\subsectionmark}[1]{}\n\\fancyhead[RO,RE]{\\thepage}\n\\fancyfoot[C]{}\n\n% Symbols and macros\n\\def\\x{\\ensuremath{{\\bf x}}}\n\\def\\y{\\ensuremath{{\\bf y}}}\n\\def\\H{\\ensuremath{{\\bf H}}}\n\\def\\T{\\ensuremath{^\\mathrm{T}}}\n\\def\\Adept{\\emph{Adept}}\n\\def\\code#1{{\\codesize\\texttt{#1}}}\n\\def\\codebf#1{{\\codesize\\texttt{\\textbf{#1}}}}\n\\def\\citem#1{\\item[{\\codesize\\texttt{#1}}]}\n\\def\\codestyle#1{\\texttt{#1}}\n\\def\\Offset{size\\_t}\n\\renewcommand\\thefootnote{\\relax}\n\\def\\cxx11{\\marginpar{\\rotatebox[origin=rb]{90}{\\textbf{C++11 only~~~}}}}\n\\reversemarginpar\n\n% Title material\n\\title{\\Adept\\ C++ Software Library: User Guide}\n\n\\author{Robin J. Hogan\\\\ \\emph{European Centre for Medium Range\n    Weather Forecasts, Reading, UK}\\\\ \\emph{and School of\n    Mathematical, Physical and Computational Sciences, University of\n    Reading, UK,}}\n\n\\date{Document version 2.1.3 (February 2024) applicable to \\Adept\\ version\n  2.1.3 \\thanks{This document is copyright \\copyright\\ Robin J. Hogan\n    2013--2024.  Permission is granted to copy, distribute and/or\n    modify this document under the terms of the GNU Free Documentation\n    License, Version 1.3 or any later version published by the Free\n    Software Foundation. This license may be found at\n    \\url{http://www.gnu.org/copyleft/fdl.html}.  As an exception, no\n    copyright is asserted for the code fragments in this document\n    (indicated in the text with a light-grey background); these code\n    fragments are hereby placed in the Public Domain, and accordingly\n    may be copied, modified and distributed without restriction.}\n  \\thanks{If you have any queries about \\Adept\\ that are not answered\n    by this document or by the information on the \\Adept\\ web site\n    (\\url{http://www.met.reading.ac.uk/clouds/adept/}) then please\n    email me at\n    \\href{mailto:r.j.hogan@ecmwf.int}{\\texttt{r.j.hogan@ecmwf.int}}.}}\n\\begin{document}\n\\maketitle\n\n\\tableofcontents\n\\def\\thefootnote{\\fnsymbol{footnote}}\n\\chapter{Introduction}\n\\section{What is Adept?}\n\\Adept\\ (Automatic Differentiation using Expression Templates) is a\nC++ software library that enables algorithms to be automatically\ndifferentiated. Since version 2.0\\footnote{Note that the version 1.9.x\n  series served as beta releases for version 2.0 of \\Adept.} it also\nprovides array classes that can be used in array expressions.  These\ntwo capabilities are fully integrated such that array expressions can\nbe differentiated efficiently, but the array capability may also be\nused on its own.\n\nThe automatic-differentiation capability uses an operator overloading\napproach, so very little code modification is\nrequired. Differentiation can be performed in forward mode (the\n``tangent-linear'' computation), reverse mode (the ``adjoint''\ncomputation), or the full Jacobian matrix can be computed. This\nbehaviour is common to several other libraries, namely ADOL-C\n\\citep{Griewank+1996}, CppAD \\citep{Bell2007} and Sacado\n\\citep{Gay2005}, but the use of expression templates, an efficient way\nto store the differential information and several other optimizations\nmean that reverse-mode differentiation tends to be significantly\nfaster and use less memory. In fact, \\Adept\\ is also usually only a\nlittle slower than an adjoint code you might write by hand, but\nimmeasurably faster in terms of user time; adjoint coding is very time\nconsuming and error-prone. For technical details of how it works,\nbenchmark results and further discussion of the factors affecting its\nspeed when applied to a particular code, see \\cite{Hogan2014}.\n\nExpression templates also underpin a number of libraries that provide\nthe capability to perform mathematical operations on entire arrays\n\\citep{Veldhuizen1995}. Unfortunately, if \\Adept\\ version 1.x and such\nan array library are used together, then the speed advantages of\nexpression templates are lost, if indeed the libraries work together\nat all. Since version 2.0, \\Adept\\ provides array classes that\novercome this problem: its automatic differentiation and array\ncapabilities are underpinned by a single unified expression template\nframework so that array expressions may be differentiated very\nefficiently.  However, it should be stressed that \\Adept\\ is useful as\na fully functional array library even if you don't wish to use its\nautomatic differentiation capability. \\Adept\\ uses BLAS and LAPACK for\nmatrix operations.\n\nThis user guide describes how to apply the \\Adept\\ software library to\nyour code, and many of the examples map on to those in the \\code{test}\ndirectory of the \\Adept\\ software package.  Section\n\\ref{sec:installing} outlines how to install \\Adept\\ on your system\nand how to compile your own code to use it. Chapter \\ref{chap:ad}\ndescribes how to use the automatic differentiation capability of the\nlibrary, chapter \\ref{chap:arrays} its array capability and chapter\n\\ref{chap:optimize} its optimization capability. Chapter\n\\ref{chap:gen} then describes general aspects such as exception\nhandling, configuration options and license terms.\n\n\\section{Installing \\Adept\\ and compiling your code to use it}\n\\label{sec:installing}\n\\Adept\\ should work with any C++98 compliant compiler, but uses some\nC++11 features if compiled with support for this later standard. Most\nof the testing has been on Linux with the GNU C++ compiler, but it\nalso compiles on Linux with the Clang and Intel compilers and on\nWindows with the Microsoft compiler. The code is built with the help\nof a \\code{configure} shell script generated by GNU autotools.  If you\nare on a non-Unix system (e.g.\\ Windows) and cannot use shell scripts,\nsee section \\ref{sec:non-unix}.\n\\subsection{Unix-like platforms}\n\\label{sec:unix}\nOn a Unix-like system, do the following:\n\\begin{enumerate}\n\\item Install the BLAS library to enable matrix multiplication.  For\n  the best performance in matrix operations it is recommended that you\n  install an optimized package such as OpenBLAS\\footnote{OpenBLAS is\n    available from \\url{http://www.openblas.net/}.} or\n  ATLAS\\footnote{ATLAS is available from\n    \\url{http://math-atlas.sourceforge.net/}.}.  If you have multiple\n  BLAS libraries available on your system you can specify the one you\n  want by calling the \\code{configure} script below with\n  \\code{--with-blas=openblas} or similar.  If \\Adept\\ is compiled\n  without BLAS support then matrix multiplication will fail at run\n  time.\n\\item Optionally install the LAPACK library, necessary for matrix\n  inversion and solving linear systems of equations. If you do not\n  install this then \\Adept\\ will still compile but the functions\n  \\code{inv} and \\code{solve} will fail at run time. Note that LAPACK\n  relies on the underlying BLAS library for its speed.\n\\item The test and benchmarking programs can make use of additional\n  libraries if available. If you also install any of the automatic\n  differentiation tools ADOL-C, CppAD and/or Sacado then the\n  benchmarking test program can compare them to \\Adept. One of the\n  test programs uses the minimization algorithm from the GNU\n  Scientific Library, if available, so you may wish to install that\n  too.\n\\item Unpack the package (\\code{tar xvfz adept-2.x.tar.gz} on Linux)\n  and \\code{cd} to the directory \\code{adept-2.x}.\n\\item Configure the build using the \\code{configure} script. The most\n  basic method is to just run\n\\begin{lstlisting}\n ./configure\n\\end{lstlisting}\nMore likely you will wish to compile with a higher level of\noptimization than the default (which is \\code{-O2}), achieved by\nsetting the environment variable \\code{CXXFLAGS}. You may also wish to\nspecify the root directory of the installation, say to\n\\code{/foo}. These may be done by running instead\n\\begin{lstlisting}\n ./configure CXXFLAGS=\"-g -O3\" --prefix=/foo\n\\end{lstlisting}\nThe \\code{-g} option to \\code{CXXFLAGS} ensures debugging information\nis stored. If you use the GNU compiler then consider the \\code{-g1}\noption instead to reduce the amount of debugging information\nstored. The GNU \\code{-march=native} option will also enable the\nfastest instruction set for the machine on which the code is being\ncompiled.  \\Adept\\ can vectorize certain floating-point array\nexpressions making use of the SSE2, AVX and AVX512 instruction sets on\nIntel hardware and the NEON instruction set on 64-bit ARM. If a library you\nwish to use is installed in a non-system directory, say under\n\\code{/foo}, then specify the locations as follows:\n\\begin{lstlisting}\n ./configure CPPFLAGS=\"-I/foo/include\" LDFLAGS=\"-L/foo/lib -Wl,-rpath,/foo/lib\"\n\\end{lstlisting}\nwhere the \\code{-rpath} business is needed in order that the\n\\Adept\\ shared library knows where to look for the libraries it is\ndependent on.  If you have them then for the benchmarking program you\ncan also add the non-system location of ADOL-C, CppAD and Sacado\nlibraries with additional \\code{-I} and \\code{-L} arguments, but note\nthat the \\code{-rpath} argument is not needed in that case.  You can\nsee the more general options available by running \\code{./configure\n  --help}; for example, you can turn-off OpenMP parallelization in the\ncomputation of Jacobian matrices using \\code{--disable-openmp}.  See\nalso section \\ref{sec:configuring} for ways to make more fundamental\nchanges to the configuration of \\Adept.  The output from the\n\\code{configure} script provides information on aspects of how\n\\Adept\\ and the test programs will be built.\n\\item Build \\Adept\\ by running\n\\begin{lstlisting}\n make\n\\end{lstlisting}\nThis will create the static and shared libraries in \\code{adept/.libs}.\n\\item Install the header files and the static and shared libraries by\n  running\n\\begin{lstlisting}\n make install\n\\end{lstlisting}\nIf this is to be installed to a system directory, you will need to log\nin as the super-user first, or run \\code{sudo make install} on\ndepending on your system.\n\\item Build and run the test programs by running\n\\begin{lstlisting}\n make check\n\\end{lstlisting}\nNote that this may be done without first installing the\n\\Adept\\ library to a system directory.  This compiles a number of test\nprograms in the \\code{test} directory and runs them one by one; if any\nfail due to an incorrect result then \\code{make check} will fail.\n%\nThe \\code{make check} operation also compiles\n\\code{autodiff\\_benchmark} in the \\code{benchmark} directory for\ncomparing the speed of the differentiation of two advection algorithms\nusing \\Adept, ADOL-C, CppAD and Sacado (or whichever subset of these\ntools you have on your system).  It also compiles \\code{animate} for\nvisualizing at a terminal what the algorithms are doing.  Further\ninformation on running these programs can be found in the\n\\code{README} files in the relevant directories.\n\\end{enumerate}\n%\nThe test programs in in the \\code{test} directory are as follows:\n%\n\\begin{enumerate}\n\\item\\code{test\\_adept}: compares the results of numerical and\n  automatic differentiation.\n\\item\\code{test\\_with\\_without\\_ad}: does the same but compiling the\n  same source code both with and without automatic differentiation\n  (see \\code{test/Makefile} for how this is done),\n\\item\\code{test\\_radiances}: demonstrates the interfacing of\n  \\Adept\\ with code that provides its own Jacobian.\n\\item\\code{test\\_gsl\\_interface}: implementation of a simple minimization\n  problem using the L-BFGS minimizer in the GSL library.\n\\item\\code{test\\_misc}: the trivial example from \\cite{Hogan2014}.\n\\item\\code{test\\_checkpoint}: demonstration of checkpointing, a useful\n  technique for large codes.\n\\item\\code{test\\_thread\\_safe}: demonstration of the use of multiple\n  OpenMP threads, each with its own instance of an \\Adept\\ stack.\n\\item\\code{test\\_no\\_lib}: demonstrates the use of the\n  \\code{adept\\_source.h} header file that means there is no need to\n  link to the \\Adept\\ library in order to create an executable.\n\\item\\code{test\\_arrays}, \\code{test\\_arrays\\_active},\n  \\code{test\\_arrays\\_active\\_pausable}, \\code{test\\_complex\\_arrays}:\n  test many of the array capabilities described in chapter\n  \\ref{chap:arrays}. Each of these four executables is compiled from\n  the same source file but with different compiler options in order to\n  test the same array operations but with (a) passive arrays, (b)\n  active arrays, (c) active arrays but with stack recording \"paused\"\n  (see setion \\ref{sec:pausable}), and (d) complex arrays.\n\\item\\code{test\\_array\\_speed}: compares the speed of array operations\n  versus the equivalent C-style \\code{for} loop.\n\\item\\code{test\\_radiances\\_array}: as \\code{test\\_radiances} but\n  demonstrates the use of \\code{add\\_derivative\\_dependence} with\n  array arguments.\n\\item\\code{test\\_fixed\\_arrays}, \\code{test\\_fixed\\_arrays\\_active}:\n  tests the functionality of arrays with fixed dimensions, i.e.\\ those\n  known at compile time. The two executables are compiled from the\n  same source file, testing (a) passive arrays and (b) active arrays.\n\\item\\code{test\\_constructors}: test the different ways of\n  constructing, assigning and linking arrays, and passing them to and\n  from functions.\n\\item\\code{test\\_derivatives}: tests that all mathematical functions\n  supported by \\Adept\\ differentiate correctly.\n\\item\\code{test\\_array\\_derivatives}: tests that selected array\n  operations differentiate correctly.\n\\item\\code{test\\_thread\\_safe\\_arrays}: tests two ways to ensure\n  arrays may be accessed and subsetted safely in a multi-threaded\n  environment.\n\\item\\code{test\\_packet\\_operations}: tests that Adept's use of Intel\n  or ARM intrinsics to accelerate vector operations leads to identical\n  output to the equivalent scalar code.\n\\item\\code{test\\_fastexp}: tests the correctness of Adept's fast\n  exponential function.\n\\item\\code{test\\_reduce\\_active}: tests the correctness of the\n  differentiation of reduction operations (\\code{sum}, \\code{product},\n  \\code{maxval} etc).\n\\item\\code{test\\_minimizer}: tests Adept's minimization capabilities\n  on the N-dimensional Rosenbrock banana function. Different\n  dimensionality and minimization algorithms can be used, but by\n  default the Levenberg-Marquardt minimizer is used with the\n  2-dimensional Rosenbrock function.\n\\end{enumerate}\n\nTo compile source files that use the \\Adept\\ library, you need to make\nsure that \\code{adept.h} and \\code{adept\\_arrays.h} are in your\ninclude path. If they are located in a directory that is not in the\ndefault include path, add something like \\code{-I/home/fred/include}\nto the compiler command line. At the linking stage, add \\code{-ladept}\nto the command line to tell the linker to look for the\n\\code{libadept.a} static library, or equivalent shared library. If\nthis file is in a non-standard location, also add something like\n\\code{-L/home/fred/lib -Wl,-rpath,/home/fred/lib} before the\n\\code{-ladept} argument to specify its location. Section\n\\ref{sec:multipleobjects} provdes an example Makefile for compiling\ncode that uses the \\Adept\\ library. Read on to see how you can compile\nan \\Adept\\ application \\emph{without} needing to link to a library.\n\n\\subsection{Non-Unix platforms, and compiling \\Adept\\ applications\n  without linking to an external library}\n\\label{sec:non-unix}\n\nMost of the difficulty in maintaining software that can compile on\nmultiple platforms arises from the different ways of compiling\nsoftware libraries, and the need to test on compilers that may be\nproprietary.  Unfortunately I don't have the time to maintain versions\nof \\Adept\\ that build specifically on Microsoft Windows or other\nnon-Unix platforms.  However, \\Adept\\ is not a large library, so I\nhave provided a very simple way to build an \\Adept\\ application\n\\emph{without} the need to link to a pre-compiled \\Adept\\ library. In\none of your source files and one only, add this near the top:\n\\begin{lstlisting}\n #include <adept_source.h>\n\\end{lstlisting}\nTypically you would include this in the source file containing the\n\\code{main} function.  This header file is simply a concatenation of\nthe \\Adept\\ library source files, so when you compile a file that\nincludes it, you compile in all the functionally of the\n\\Adept\\ library. All other source files in your application should\ninclude only the \\code{adept.h} or \\code{adept\\_arrays.h} header file\nas normal.  When you link all your object files together to make an\nexecutable, the \\Adept\\ functionality will be built in, even though\nyou did not link to an external \\Adept\\ library.\n\nBy default, \\code{adept\\_arrays.h} does not enable BLAS (needed for\nmatrix multiplication) or LAPACK (needed for matrix inversion and\nsolving linear systems of equations); to enable either BLAS alone, or\nboth BLAS and LAPACK, uncomment the lines near the top of\n\\code{adept\\_source.h} defining \\code{HAVE\\_BLAS} and\n\\code{HAVE\\_LAPACK}, and link against functioning BLAS and LAPACK\nlibrary. A demonstration of the use of \\code{adept\\_source.h} is in\nthe \\code{test/test\\_no\\_lib.cpp} source file, which needs to be\ncompiled together with \\code{test/algorithm.cpp} to make an\nexecutable.\n%\nIt is hoped that this feature will make it easy to use \\Adept\\ on\nnon-Unix platforms, although of course this feature works just as well\non Unix-like platforms as well.\n%  If you want to use OpenBLAS on such\n%platforms then you will still need to install that library in the\n%normal way.%\n\nA further point to note is that, under the terms of the license, it is\npermitted to copy all the \\Adept\\ include files, including\n\\code{adept\\_source.h}, into an include directory in your software\npackage and use it from there in both binary and source-code releases\nof your software. This means that users do not need to install\n\\Adept\\ separately before they use your software.  However, if you do\nthis then remember that your use of these files must comply with the\nterms of the Apache License, Version 2.0; see section\n\\ref{sec:license} for details.\n%\n\\chapter{Using \\Adept\\ for automatic differentiation}\n\\label{chap:ad}\n%\n\\section{Introduction}\n\\label{sec:ad_functionality}\nThis chapter describes how to use \\Adept\\ to differentiate your code.\nFor simplicity, none of the examples use array functionality described\nin the next chapter. \\Adept\\ provides the following\nautomatic-differentiation functionality:\n%\n\\begin{description}\n\\item[Full Jacobian matrix] Given the non-linear function $\\y=f(\\x)$\n  relating vector $\\y$ to vector $\\x$ coded in C or C++, after a\n  little code modification \\Adept\\ can compute the Jacobian matrix\n  $\\H=\\partial\\y/\\partial\\x$, where the element at row $i$ and column $j$ of\n  $\\H$ is $H_{i,j}=\\partial y_i/\\partial x_j$. This matrix will be\n  computed much more rapidly and accurately than if you simply\n  recompute the function multiple times, each time perturbing a\n  different element of $\\x$ by a small amount. The Jacobian matrix is\n  used in the Gauss-Newton and Levenberg-Marquardt minimization\n  algorithms.\n\\item[Reverse-mode differentiation] This is a key component in\n  optimization problems where a non-linear function needs to be\n  minimized but the state vector $\\x$ is too large for it to make\n  sense to compute the full Jacobian matrix. Atmospheric data\n  assimilation is the canonical example in the field of\n  meteorology. Given a non-linear function $J(\\x)$ relating the\n  scalar to be minimized $J$ to vector $\\x$, \\Adept\\ will compute the\n  vector of adjoints $\\partial J/\\partial\\x$. Moreover, for a\n  component of the code that may be expressed as a multi-dimensional\n  non-linear function $\\y=f(\\x)$, \\Adept\\ can compute $\\partial\n  J/\\partial\\x$ if it is provided with the vector of input adjoints\n  $\\partial J/\\partial\\y$.  In this case, $\\partial J/\\partial\\x$ is\n  equal to the matrix-vector product $\\H\\T\\partial J/\\partial\\y$, but\n  it is computed here without computing the full Jacobian matrix\n  $\\H$. The vector $\\partial J/\\partial\\x$ may then be used in a\n  quasi-Newton minimization scheme \\cite[e.g.,][]{Liu+1989}.\n\\item[Forward-mode differentiation] Given the non-linear function\n  $\\y=f(\\x)$ and a vector of perturbations $\\delta\\x$, \\Adept\\ will\n  compute the corresponding vector $\\delta\\y$ arising from a\n  linearization of the function $f$. Formally, $\\delta\\y$ is equal\n  to the matrix-vector product $\\H\\delta\\x$, but it is computed here\n  without computing the full Jacobian matrix $\\H$. Note that\n  \\Adept\\ is designed for the reverse case, so might not be as fast\n  or economical in memory in the forward mode as libraries written\n  especially for that purpose (although Hogan, 2014, showed that it\n  was competitive).\n\\end{description}%\n%\n\\Adept\\ can automatically differentiate the following\noperators and functions:\n\\begin{itemize}\n\\item The standard binary mathematical operators \\code{+}, \\code{-},\n  \\code{*} and \\code{/}.\n\\item The assignment versions of these operators:\n  \\code{+=}, \\code{-=}, \\code{*=} and \\code{/=}.\n\\item The unary mathematical functions \\code{sqrt}, \\code{exp},\n  \\code{log}, \\code{log10}, \\code{sin}, \\code{cos}, \\code{tan},\n  \\code{asin}, \\code{acos}, \\code{atan}, \\code{sinh}, \\code{cosh},\n  \\code{tanh}, \\code{abs}, \\code{asinh}, \\code{acosh}, \\code{atanh},\n  \\code{expm1}, \\code{log1p}, \\code{cbrt}, \\code{erf}, \\code{erfc},\n  \\code{exp2}, \\code{log2}, \\code{round}, \\code{trunc}, \\code{rint}\n  and \\code{nearbyint},\n\\item The binary functions \\code{pow}, \\code{atan2}, \\code{min},\n  \\code{max}, \\code{fmin} and \\code{fmax}.\n\\end{itemize}\nVariables to take part in expressions to be differentiated have a\nspecial ``active'' type; such variables can take part in comparison\noperations \\code{==}, \\code{!=}, \\code{>}, \\code{<}, \\code{>=} and\n\\code{<=}, as well as the diagnostic functions \\code{isfinite},\n\\code{isinf} and \\code{isnan}.\n\nNote that at present \\Adept\\ is missing some functionality that you may\nrequire:\n\n\\begin{itemize}\n\\item Differentiation is first-order only: it cannot directly compute\n  higher-order derivatives such as the Hessian matrix, although\n  section \\ref{sec:optimize} describes how \\Adept\\ can help compute\n  the approximate Hessian if the cost function (also known as the\n  penalty function or objective function) is in a particular commonly\n  used form.\n\\item It has limited support for complex numbers; no support for\n  mathematical functions of complex numbers, and expressions involving\n  operations (addition, subtraction, multiplication and division) on\n  complex numbers are not optimized.\n\\item It can be applied to C and C++ only; \\Adept\\ could not be\n  written in Fortran since the language provides no template\n  capability.\n\\end{itemize}%\n%\nIt is hoped that future versions will remedy these limitations (and\nmaybe even a future version of Fortran will support templates).\n\nSection \\ref{sec:preparation} describes how to prepare your code for\nautomatic differentiation, and section \\ref{sec:adjoint} describes how\nto perform forward- and reverse-mode automatic differentiation on this\ncode. Section \\ref{sec:jacobian} describes how to compute Jacobian\nmatrices. Section \\ref{sec:realworld} provides a detailed description\nof how to interface an algorithm implemented using \\Adept\\ with a\nthird-party minimization library.  Section \\ref{sec:withwithout}\ndescribes how to call a function both with and without automatic\ndifferentiation from within the same program. Section\n\\ref{sec:interfacehandcoded} describes how to interface to software\nmodules that compute their own Jacobians.  Section \\ref{sec:stack}\ndescribes the user-oriented member functions of the \\code{Stack} class\nthat contains the differential information and section\n\\ref{sec:adouble} describes the member functions of the ``active''\ndouble-precision type \\code{adouble}.\n\n\n\\section{Code preparation}\n\\label{sec:preparation}\nIf you have used ADOL-C, CppAD or Sacado then you will already be\nfamiliar with what is involved in applying an operator-overloading\nautomatic differentiation package to your code. The user interface to\n\\Adept\\ differs from these only in the detail. It is assumed that you\nhave an algorithm written in C or C++ that you wish to\ndifferentiate. This section deals with the modifications needed to\nyour code, while section \\ref{sec:adjoint} describes the small\nadditional amount of code you need to write to differentiate it.\n\nIn all source files containing code to be differentiated, you need to\ninclude the \\code{adept.h} header file and import the \\code{adouble}\ntype from the \\code{adept} namespace. Assuming your code uses double\nprecision, you then search and replace \\code{double} with the\n``active'' equivalent \\code{adouble}, but doing this only for those\nvariables whose values depend on the independent input variables.\nUnder the hood this type is an alias for \\code{Active<double>}.  The\nsingle-precision equivalent is \\code{afloat}, an alias for\n\\code{Active<float>}.  Active and passive variables of single and\ndouble precision may be used together in the same expressions, but\nnote that by default all differentiation is done in double precision.\n\nIf you wish to enable your code to be easily recompiled to use\ndifferent precisions, then you may alternatively use the generic\n\\code{Real} type from the \\code{adept} namespace with its active\nequivalent \\code{aReal} (an alias for \\code{Active<Real>}). Section\n\\ref{sec:configuring} describes how to redefine \\code{Real} to\nrepresent single, double or quadruple precision.  Automatic\ndifferentiation will be performed using the same precision as\n\\code{Real}, but but be aware that if this is defined to be the same\nas a single-precision \\code{float}, accumulation of round-off error\ncan make the accuracy of derivatives insufficient for minimization\nalgorithms. The examples in the remainder of this chapter use only\ndouble precision.\n\nConsider the following contrived algorithm from \\cite{Hogan2014} that\ntakes two inputs and returns one output:\n\n\\begin{lstlisting}\n double algorithm(const double x[2]) {\n   double y = 4.0;\n   double s = 2.0*x[0] + 3.0*x[1]*x[1];\n   y *= sin(s);\n   return y;\n }\n\\end{lstlisting}\n\n\\noindent The modified code would look like this:\n\n\\begin{lstlisting}\n #include <adept.h>\n using adept::adouble;\n\n adouble algorithm(const adouble x[2]) {\n   adouble y = 4.0;\n   adouble s = 2.0*x[0] + 3.0*x[1]*x[1];\n   y *= sin(s);\n   return y;\n }\n\\end{lstlisting}\n\n\\noindent Changes like this need to be done in all source files that\nform part of an algorithm to be differentiated. \n\nIf you need to access the real number underlying an \\code{adouble}\nvariable \\code{a}, for example in order to use it as an argument to\nthe \\code{fprintf} function, then use \\code{a.value()} or\n\\code{adept::value(a)}. Any mathematical operations performed on\nthis real number will not be differentiated.\n\nYou may use \\code{adouble} as the template argument of a Standard\nTemplate Library (STL) vector type (i.e.  \\code{std::vector\\textless\n  adouble\\textgreater}), or indeed any container where you access\nindividual elements one by one. For types allowing mathematical\noperations on the whole object, such as the STL \\code{complex} and\n\\code{valarray} types, you will find that although you can multiply\none \\code{std::complex\\textless adouble\\textgreater} or\n\\code{std::valarray\\textless adouble\\textgreater} object by another,\nmathematical functions (\\code{exp}, \\code{sin} etc.) will not work\nwhen applied to whole objects, and neither will some simple operations\nsuch as multiplying these types by an ordinary (non-active)\n\\code{double} variable.  Moreover, the performance is not great\nbecause expressions cannot be fully optimized when in these\ncontainers.  Therefore If you need array functionality then you should\nuse the features described in chapter \\ref{chap:arrays}.  It is hoped\nthat a future version of \\Adept\\ will include its own complex type.\n\n\\section{Applying reverse-mode differentiation}\n\\label{sec:adjoint}\n\nSuppose you wanted to create a version of \\code{algorithm} that\nreturned not only the result but also the gradient of the result with\nrespect to its inputs, you would do this:\n\n\\begin{lstlisting}\n #include <adept.h>\n double algorithm_and_gradient(\n                     const double x_val[2], // Input values\n                     double dy_dx[2]) {     // Output gradients\n   adept::Stack stack;                      // Where the derivative information is stored\n   using adept::adouble;                    // Import adouble from adept\n   adouble x[2] = {x_val[0], x_val[1]};     // Initialize active input variables\n   stack.new_recording();                   // Start recording\n   adouble y = algorithm(x);                // Call version overloaded for adouble args\n   y.set_gradient(1.0);                     // Defines y as the cost function \n   stack.compute_adjoint();                 // Run the adjoint algorithm\n   dy_dx[0] = x[0].get_gradient();          // Store the first gradient\n   dy_dx[1] = x[1].get_gradient();          // Store the second gradient\n   return y.value();                        // Return the result of the simple computation\n }\n\\end{lstlisting}\n%\nThe component parts of this function are in a specific order, and if\nthis order is violated then the code will not run correctly. The steps\nare now described.\n%\n\\subsection{Set-up stack to record derivative information}\n\\label{sec:stack_setup}\n\\begin{lstlisting}\n adept::Stack stack;\n\\end{lstlisting}\nThe \\code{Stack} object is where the differential version of the\nalgorithm will be stored. When initialized, it makes itself accessible\nto subsequent statements via a global variable, but using thread-local\nstorage to ensure thread safety. \\emph{It must be initialized before\n  the first \\code{adouble} object is instantiated and it must not go\n  out of scope until the last \\code{adouble} object is destructed.}\nThis is because \\code{adouble} objects register themselves with the\ncurrently active stack, and deregister themselves when they are\ndestroyed; if the same stack is not active throughout the lifetime of\nsuch \\code{adouble} objects then the code will crash with a\nsegmentation fault.\n\nIn the example here, the \\code{Stack} object is local to the scope of\nthe function. If another \\code{Stack} object had been initialized by\nthe calling function and so was active at the point of entry to the\nfunction, then the local \\code{Stack} object would throw an\n\\code{adept::stack\\_already\\_active} exception. See Test 3 described\nat \\code{test/README} in the \\Adept\\ package if you want to use\nmultiple \\code{Stack} objects in the same program: the relevant source\ncode is in \\code{test/simulate\\_radiances.cpp}, which temporarily\ndeactivates the existing \\code{Stack} objects in order that the local\none can run.  A disadvantage of local \\code{Stack} objects is that the\nmemory it uses must be reallocated each time the function is called.\nThis can be overcome in several ways:\n\\begin{itemize}\n\\item Declare the \\code{Stack} object to be \\code{static}, which means\n  that it will persist between function calls. This has the\n  disadvantage that you won't be able to use other \\code{Stack}\n  objects in the program without deactivating this one first (see \\code{test\\_radiances} in the \\Adept\\ package, referred to above, for how to do this).\n\\item Initialize \\code{Stack} at a higher level in the program. If you\n  need access to the stack, you may either pass a reference to it to\n  functions such as \\code{algorithm\\_and\\_gradient}, or alternatively\n  you can use the \\code{adept::active\\_stack()} function to return a\n  pointer to the currently active stack object.\n\\item Put it in a class so that it is accessible to member functions;\n  this approach is demonstrated in section \\ref{sec:realworld}.\n\\end{itemize}\n%\n\\subsection{Initialize independent variables and start recording}\n\\begin{lstlisting}\n adouble x[2] = {x_val[0], x_val[1]};\n stack.new_recording();\n\\end{lstlisting}\nThe first line here simply copies the input values to the algorithm\ninto \\code{adouble} variables. These are the \\emph{independent\n  variables}, but note that there is no obligation for these to be\nstored as one array (as in CppAD), and for forward- and reverse-mode\nautomatic differentiation you do not need to tell \\Adept\\ explicitly\nvia a function call which variables are the independent ones. The next\nline clears all differential statements from the stack so that it is\nready for a new recording of differential information.\n%\nNote that the first line here actually stores two differential\nstatements, $\\delta$\\code{x[0]=0} and $\\delta$\\code{x[1]=0}, which are\nimmediately cleared by the \\code{new\\_recording} function call.  To\navoid the small overhead of storing redundant information on the\nstack, we could replace the first line with \n\\begin{lstlisting}\n x[0].set_value(x_val[0]);\n x[1].set_value(x_val[1]);\n\\end{lstlisting}\nor\n\\begin{lstlisting}\n adept::set_values(x, 2, x_val);\n\\end{lstlisting}\nwhich have the effect of setting the values of \\code{x} without storing\nthe equivalent differential statements.\n\nPrevious users of \\Adept\\ version 0.9 should note that since version\n1.0, the \\code{new\\_recording} function replaces the \\code{start}\nfunction call, which had to be put \\emph{before} the independent\nvariables were initialized.  The problem with this was that the\nindependent variables had to be initialized with the \\code{set\\_value}\nor \\code{set\\_values} functions, otherwise the gradients coming out of\nthe automatic differentiation would all be zero.  Since it was easy to\nforget this, \\code{new\\_recording} was introduced to allow the\nindependent variables to be assigned in the normal way using the\nassignment operator (\\code{=}).  But don't just replace \\code{start}\nin your version-0.9-compatible code with \\code{new\\_recording}; the\nlatter must appear \\emph{after} the independent variables have been\ninitialized.\n\n\\subsection{Perform calculations to be differentiated}\n\\begin{lstlisting}\n adouble y = algorithm(x);\n\\end{lstlisting}\nThe algorithm is called, and behind the scenes the equivalent\ndifferential statement for every mathematical statement is stored in the\nstack. The result of the forward calculation is stored in \\code{y},\nknown as a dependent variable. This example has one dependent\nvariable, but any number is allowed, and they could be returned in\nanother way, e.g. by passing a non-constant array to algorithm that is\nfilled with the final values when the function returns.\n%\n\\subsection{Perform reverse-mode differentiation}\n\n\\begin{lstlisting}\n y.set_gradient(1.0);\n stack.compute_adjoint();\n\\end{lstlisting}\nThe first line sets the initial gradient (or adjoint) of \\code{y}. In\nthis example, we want the output gradients to be the derivatives of\n\\code{y} with respect to each of the independent variables; to achieve\nthis, the initial gradient of \\code{y} must be unity.\n\nMore generally, if \\code{y} was only an intermediate value in the\ncomputation of cost function $J$, then for the outputs of the\nfunction to be the derivatives of $J$ with respect to each of the\nindependent variables, we would need to set the gradient of\n\\code{y} to $\\partial J/\\partial$\\code{y}. In the case of multiple\nintermediate values, a separate call to \\code{set\\_gradient} is needed\nfor each intermediate value.  If \\code{y} was an array of length\n\\code{n} then the gradient of each element could be set to the values in a \\code{double} array \\code{y\\_ad} using\n\\begin{lstlisting}\n adept::set_gradients(y, n, y_ad);\n\\end{lstlisting}\n\nThe \\code{compute\\_adjoint()} member function of stack performs the\nadjoint calculation, sweeping in reverse through the differential\nstatements stored on the stack. Note that this must be preceded by at\nleast one \\code{set\\_gradient} or \\code{set\\_gradients} call, since\nthe first such call initializes the list of gradients for\n\\code{compute\\_adjoint()} to act on. Otherwise,\n\\code{compute\\_adjoint()} will throw a\n\\code{gradients\\_not\\_initialized} exception. \n\n\\subsection{Extract the final gradients}\n\n\\begin{lstlisting}\n dy_dx[0] = x[0].get_gradient();\n dy_dx[1] = x[1].get_gradient();\n\\end{lstlisting}\nThese lines simply extract the gradients of the cost function\nwith respect to the two independent variables. Alternatively we could\nhave extracted them simultaneously using\n\\begin{lstlisting}\n adept::get_gradients(x, 2, dy_dx);\n\\end{lstlisting}\n\nTo do forward-mode differentiation in this example would involve\nsetting the initial gradients of \\code{x} instead of \\code{y}, calling\nthe member function \\code{compute\\_tangent\\_linear()} instead of\n\\code{compute\\_adjoint()}, and extracting the final gradients from\n\\code{y} instead of \\code{x}.\n\n\\section{Computing Jacobian matrices}\n\\label{sec:jacobian}\nUntil now we have considered a function with two inputs and one\noutput.  Consider the following more general function whose declaration\nis\n\\begin{lstlisting}\n void algorithm2(int n, const adouble* x, int m, adouble* y);\n\\end{lstlisting}\nwhere \\code{x} points to the \\code{n} independent (input) variables\nand \\code{y} points to the \\code{m} dependent (output) variables. The\nfollowing function would return the full Jacobian matrix:\n%\n\\begin{lstlisting}\n #include <vector>\n #include <adept.h>\n void algorithm2_jacobian(\n                     int n,                 // Number of input values\n                     const double* x_val,   // Input values\n                     int m,                 // Number of output values\n                     double* y_val,         // Output values\n                     double* jac) {         // Output Jacobian matrix\n   using adept::adouble;                    // Import Stack and adouble from adept\n   adept::Stack stack;                      // Where the derivative information is stored\n   std::vector<adouble> x(n);               // Vector of active input variables\n   adept::set_values(&x[0], n, x_val);      // Initialize adouble inputs\n   stack.new_recording();                   // Start recording\n   std::vector<adouble> y(m);               // Create vector of active output variables\n   algorithm2(n, &x[0], m, &y[0]);          // Run algorithm\n   stack.independent(&x[0], n);             // Identify independent variables\n   stack.dependent(&y[0], m);               // Identify dependent variables\n   stack.jacobian(jac);                     // Compute & store Jacobian in jac\n   for (int iy = 0; iy < m; ++iy) \n     y_val[iy] = y[iy].value();             // Extract value from active object \n }\n\\end{lstlisting}\n%\nNote that:\n\\begin{itemize}\n\\item The \\code{independent} member function of stack is used to\n  identify the independent variables, i.e.\\ the variables that the\n  derivatives in the Jacobian matrix will be with respect to. In this\n  example there are \\code{n} independent variables located together in\n  memory and so can be identified all at once. Multiple calls are\n  possible to identify further independent variables.  To identify a\n  single independent variable, call \\code{independent} with just one\n  argument, the independent variable (not as a pointer). \n\\item The \\code{dependent} member function of stack identifies the\n  dependent variables, and its usage is identical to\n  \\code{independent}.\n\\item The memory provided to store the Jacobian matrix (pointed to by\n  \\code{jac}) must be a one-dimensional array of size\n  \\code{m}$\\times$\\code{n}, where \\code{m} is the number of dependent\n  variables and \\code{n} is the number of independent variables.\n\\item The resulting matrix is stored in the sense of the index\n  representing the dependent variables varying fastest (column-major\n  order).\n% To get row-major order, call the \\code{jacobian} function\n%  with a second argument of \\code{true} (see section \\ref{sec:stack}).\n\\item Internally, the Jacobian calculation is performed by multiple\n  forward or reverse passes, whichever would be faster (dependent on\n  the numbers of independent and dependent variables).\n\\item The use of \\code{std::vector<adouble>} rather than \\code{new\n  adouble[n]} ensures no memory leaks in the case of an exception being\n  thrown, since the memory associated with \\code{x} and \\code{y} will\n  be automatically deallocated when they go out of scope.\n\\end{itemize}%\n\nAs described in chapter \\ref{chap:arrays}, \\Adept\\ version 2.0\nintroduced built-in multi-dimensional arrays of both active\n(e.g.\\ \\code{aVector} and passive (e.g.\\ \\code{Vector}) variables. It\ntherefore seems more natural to express the algorithm above in terms\nof these objects, which could be done as follows:\n\n\\begin{lstlisting}\n #include <adept_arrays.h>\n\n // Adept vectors know their own length, so lengths do not need to be\n // passed in as well\n adept::aVector algorithm2(const adept::aVector& x);\n\n void algorithm2_jacobian(\n                const adept::Vector& x_val, // Input values\n                adept::Vector& y_val,       // Output values (correctly sized or empty)\n                adept::Matrix& jac) {       // Output Jacobian matrix (correctly sized)\n   adept::Stack stack;                      // Where the derivative information is stored\n   adept::aVector x = x_val;                // Active vector of inputs\n   stack.new_recording();                   // Start recording\n   adept::aVector y = algorithm2(x);        // Run algorithm and store outputs\n   stack.independent(x);                    // Identify independent variables\n   stack.dependent(y);                      // Identify dependent variables\n   stack.jacobian(jac);                     // Compute & store Jacobian (since Adept 2.0.8)\n   // If jac is empty we can automatically resize it using this instead (since Adept 2.0.8):\n   //jac = stack.jacobian();\n   y_val = value(y);                        // Extract the values from the active array\n }\n\\end{lstlisting}\n\n\\section{Real-world usage: interfacing \\Adept\\ to a third-party minimization library}\n\\label{sec:realworld}\nSuppose we want to find the vector $\\x$ that minimizes an cost\nfunction $J(\\x)$ that consists of a large algorithm coded using the\n\\Adept\\ library and encapsulated within a C++ class.  In this section\nwe illustrate how it may be interfaced to a third-party minimization\nalgorithm with a C-style interface, specifically the free one in the\nGNU Scientific Library.  Note that since version 2.0.8,\n\\Adept\\ provides its own minimization functionality, as described in\nchapter \\ref{chap:optimize}.\n\nThe full working version of this example, using the N-dimensional\nRosenbrock banana function as the function to be minimized, is in\n\\code{test/test\\_gsl\\_interface.cpp} of the \\Adept\\ software package\n(see the description of Test 4 in \\code{test/README}). The interface\nto the algorithm is as follows:\n%\n\\begin{lstlisting}\n #include <vector>\n #include <adept.h>\n using adept::adouble;\n class State {\n  public:\n    // Construct a state with n state variables\n    State(int n) { active_x_.resize(n); x_.resize(n); }\n    // Minimize the function, returning true if minimization successful, false otherwise\n    bool minimize();\n    // Get copy of state variables after minimization\n    void x(std::vector<double>& x_out) const;\n    // For input state variables x, compute the function J(x) and return it\n    double calc_function_value(const double* x);\n    // For input state variables x, compute function and put its gradient in dJ_dx\n    double calc_function_value_and_gradient(const double* x, double* dJ_dx);\n    // Return the size of the state vector\n    unsigned int nx() const { return active_x_.size(); }\n  private:\n    // Active version: the algorithm is contained in the definition of this function\n    adouble calc_function_value(const adouble* x);\n    // DATA\n    adept::Stack stack_;             // Adept stack object (must be before active state\n                                     // variables, e.g. adouble, in class definition)\n    std::vector<adouble> active_x_;  // Active state variables (must be after Stack)\n };\n\\end{lstlisting}\n%\nThe algorithm itself is contained in the definition of\n\\code{calc\\_function\\_value(const adouble*)}, which is implemented using\n\\code{adouble} variables (following the rules in section\n\\ref{sec:preparation}). However, the public interface to the class\nuses only standard \\code{double} types, so the use of \\Adept\\ is\nhidden to users of the class.  Of course, a complicated algorithm may\nbe implemented in terms of multiple classes that do exchange data via\n\\code{adouble} objects. We will be using a quasi-Newton minimization\nalgorithm that calls the algorithm many times with trial vectors $\\x$,\nand for each call may request not only the value of the function, but\nalso its gradient with respect to $\\x$. Thus the public interface\nprovides \\code{calc\\_function\\_value(const double*)} and\n\\code{calc\\_function\\_value\\_and\\_gradient}, which could be implemented as\nfollows:\n%\n\\begin{lstlisting}\n double State::calc_function_value(const double* x) {\n   for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i];\n   stack_.new_recording();\n   return value(calc_function_value(&active_x_[0]));\n }\n\n double State::calc_function_value_and_gradient(const double* x, double* dJ_dx) {\n   for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i];\n   stack_.new_recording();\n   adouble J = calc_function_value(&active_x_[0]);\n   J.set_gradient(1.0);\n   stack_.compute_adjoint();\n   adept::get_gradients(&active_x_[0], nx(), dJ_dx);\n   return value(J);\n }\n\\end{lstlisting}\n%\nThe first function simply copies the \\code{double} inputs into an\n\\code{adouble} vector and runs the version of\n\\code{calc\\_function\\_value} for \\code{adouble} arguments. Obviously\nthere is an inefficiency here in that gradients are recorded that are\nthen not used, and this function would be typically 2.5--3 times\nslower than an implementation of the algorithm that did not store\ngradients.  Section \\ref{sec:withwithout} describes three ways to\novercome this problem.  The second function above implements\nreverse-mode automatic differentiation as described in section\n\\ref{sec:adjoint}.\n\nThe \\code{minimize} member function could be implemented using GSL as\nfollows:\n%\n\\begin{lstlisting}\n #include <iostream>\n #include <gsl/gsl_multimin.h>\n\n bool State::minimize() {\n   // Minimizer settings\n   const double initial_step_size = 0.01;\n   const double line_search_tolerance = 1.0e-4;\n   const double converged_gradient_norm = 1.0e-3;\n   // Use the \"limited-memory BFGS\" quasi-Newton minimizer\n   const gsl_multimin_fdfminimizer_type* minimizer_type\n     = gsl_multimin_fdfminimizer_vector_bfgs2;\n\n   // Declare and populate structure containing function pointers\n   gsl_multimin_function_fdf my_function;\n   my_function.n = nx();\n   my_function.f = my_function_value;\n   my_function.df = my_function_gradient;\n   my_function.fdf = my_function_value_and_gradient;\n   my_function.params = reinterpret_cast<void*>(this);\n   \n   // Set initial state variables using GSL's vector type\n   gsl_vector *x;\n   x = gsl_vector_alloc(nx());\n   for (unsigned int i = 0; i < nx(); ++i) gsl_vector_set(x, i, 1.0);\n\n   // Configure the minimizer\n   gsl_multimin_fdfminimizer* minimizer\n     = gsl_multimin_fdfminimizer_alloc(minimizer_type, nx());\n   gsl_multimin_fdfminimizer_set(minimizer, &my_function, x,\n                                 initial_step_size, line_search_tolerance);\n   // Begin loop\n   size_t iter = 0;\n   int status;\n   do {\n     ++iter;\n     // Perform one iteration\n     status = gsl_multimin_fdfminimizer_iterate(minimizer);\n\n     // Quit loop if iteration failed\n     if (status != GSL_SUCCESS) break;\n    \n     // Test for convergence\n     status = gsl_multimin_test_gradient(minimizer->gradient, converged_gradient_norm);\n   }\n   while (status == GSL_CONTINUE && iter < 100);\n\n   // Free memory\n   gsl_multimin_fdfminimizer_free(minimizer);\n   gsl_vector_free(x);\n\n   // Return true if successfully minimized function, false otherwise\n   if (status == GSL_SUCCESS) {\n     std::cout << \"Minimum found after \" << iter << \" iterations\\n\";\n     return true;\n   }\n   else {\n     std::cout << \"Minimizer failed after \" << iter << \" iterations: \"\n               << gsl_strerror(status) << \"\\n\";\n     return false;\n   }\n }\n\\end{lstlisting}\n%\nThe GSL interface requires three functions to be defined, each of\nwhich takes a vector of state variables $\\x$ as input:\n\\code{my\\_function\\_value}, which returns the value of the function;\n\\code{my\\_function\\_gradient}, which returns the gradient of the\nfunction with respect to $\\x$; and\n\\code{my\\_function\\_value\\_and\\_gradient}, which returns the value and\nthe gradient of the function. These functions are provided to GSL as\nfunction pointers (see above), but since GSL is a C library, we need\nto use the `\\code{extern \"C\"}' specifier in their definition. Thus the\nfunction definitions would be:\n%\n\\begin{lstlisting}\n extern \"C\" \n double my_function_value(const gsl_vector* x, void* params) {\n   State* state = reinterpret_cast<State*>(params);\n   return state->calc_function_value(x->data);\n }\n\n extern \"C\"\n void my_function_gradient(const gsl_vector* x, void* params, gsl_vector* gradJ) { \n   State* state = reinterpret_cast<State*>(params);\n   state->calc_function_value_and_gradient(x->data, gradJ->data);\n }\n\n extern \"C\"\n void my_function_value_and_gradient(const gsl_vector* x, void* params,\n                                     double* J, gsl_vector* gradJ) { \n   State* state = reinterpret_cast<State*>(params);\n   *J = state->calc_function_value_and_gradient(x->data, gradJ->data);\n }\n\\end{lstlisting}\n%\nWhen the \\code{gsl\\_multimin\\_fdfminimizer\\_iterate} function is\ncalled, it chooses a search direction and performs several calls of\nthese functions to approximately minimize the function along this\nsearch direction. The \\code{this} pointer (i.e.\\ the pointer to the\n\\code{State} object), which was provided to the \\code{my\\_function}\nstructure in the definition of the \\code{minimize} function above, is\nprovided as the second argument to each of the three functions\nabove. Unlike in C, in C++ this pointer needs to be cast back to a\npointer to a \\code{State} type, hence the use of\n\\code{reinterpret\\_cast}.\n\nThat's it! A call to \\code{minimize} should successfully minimize well\nbehaved differentiable multi-dimensional functions.  It should be\nstraightforward to adapt the above to work with other minimization\nlibraries.\n\n\\section{Calling an algorithm with and without automatic differentiation from the same program}\n\\label{sec:withwithout}\nThe \\code{calc\\_function\\_value(const double*)} member function\ndefined in section \\ref{sec:realworld} is sub-optimal in that it\nsimply calls the \\code{calc\\_function\\_value(const adouble*)} member\nfunction, which not only computes the value of the function, it also\nrecords the derivative information of all the operations involved.\nThis information is then ignored. This overhead makes the function\ntypically 2.5--3 times slower than it needs to be, although sometimes\n(specifically for loops containing no trancendental functions) the\ndifference between an algorithm coded in terms of \\code{double}s and\nthe same algorithm coded in terms of \\code{adouble}s can exceed a\nfactor of 10 \\citep{Hogan2014}.  The impact on the computational speed\nof the entire minimization process depends on how many requests are\nmade for the function value only as opposed to the gradient of the\nfunction, and can be significant.  We require a way to avoid the\noverhead of \\Adept\\ computing the derivative information for calls to\n\\code{calc\\_function\\_value(const double*)}, without having to\nmaintain two versions of the algorithm, one coded in terms of\n\\code{double}s and the other in terms of \\code{adouble}s. The three\nways to achieve this are now described.\n%\n\\subsection{Function templates}\n\\label{sec:func_templates}\nThe simplest approach is to use a function template for those\nfunctions that take active arguments, as demonstrated in the following\nexample:\n%\n\\begin{lstlisting}\n #include <adept.h>\n class State {\n  public:\n    ...\n    template <typename xdouble>\n    xdouble calc_function_value(const xdouble* x);\n    ...\n };\n\n // Example function definition that must be in a header file included\n // by any source file that calls calc_function_value\n template <typename xdouble>\n inline\n xdouble State::calc_function_value(const xdouble* x) {\n   xdouble y = 4.0;\n   xdouble s = 2.0*x[0] + 3.0*x[1]*x[1];\n   y *= sin(s);\n   return y;\n }\n\\end{lstlisting}\n%\nThis takes the example from section \\ref{sec:preparation} and replaces\n\\code{adouble} by the template type \\code{xdouble}. Thus,\n\\code{calc\\_function\\_value} can be called with either \\code{double}\nor \\code{adouble} arguments, and the compiler will compile inline the\ninactive or active version accordingly.  Note that the function\ntemplate need not be a member function of a class.  \n\nThis technique is good if only a small amount of code needs to be\ndifferentiated, but for large models the use of inlining is likely to\nlead to duplication of compiled code leading to large executables and\nlong compile times.  The following two approaches do not have this\ndrawback and are suitable for large codes.\n\n\\subsection{Pausable recording}\n\\label{sec:pausable}\nThe second method involves compiling the entire code with the\n\\code{ADEPT\\_RECORDING\\_PAUSABLE} preprocessor variable defined, which\ncan be done by adding an argument \\code{-DADEPT\\_RECORDING\\_PAUSABLE}\nto the compler command line. This modifies the behaviour of\nmathematical operations performed on \\code{adouble} variables: instead\nof performing the operation and then storing the derivative\ninformation, it performs the operation and then only stores the\nderivative information if the \\Adept\\ stack is not in the ``paused''\nstate. We then use the following member function definition instead of\nthe one in section \\ref{sec:realworld}:\n%\n\\begin{lstlisting}\n double State::calc_function_value(const double* x) {\n   stack_.pause_recording();\n   for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i];\n   double J = value(calc_function_value(&active_x_[0]));\n   stack_.continue_recording();\n   return J;\n }\n\\end{lstlisting}\n%\nBy pausing the recording for all operations on \\code{adouble} objects,\nmost of the overhead of storing derivative information is removed. The\nextra run-time check to see whether the stack is in the paused state,\nwhich is carried out by mathematical operations involving\n\\code{adouble} objects, generally adds a small overhead.  However, in\nalgorithms where most of the number crunching occurs in loops\ncontaining no trancendental functions, even if the stack is in the\npaused state, the presence of the check can prevent the compiler from\nagressively optimizing the loop.  In that instance the third method\nmay be preferable.\n%\n\\subsection{Multiple object files per source file}\n\\label{sec:multipleobjects}\nThe third method involves compiling each source file containing\nfunctions with \\code{adouble} arguments twice.  The first time, the\ncode is compiled normally to produce an object file containing\ncompiled functions including automatic differentiation. The second\ntime, the code is compiled with the\n\\code{-DADEPT\\_NO\\_AUTOMATIC\\_DIFFERENTIATION} flag on the compiler\ncommand line. This instructs the \\code{adept.h} header file to turn\noff automatic differentiation by defining the \\code{adouble} type to\nbe an alias of the \\code{double} type. This way, a second set of\nobject files are created containing overloaded versions of the same\nfunctions as the first set but this time without automatic\ndifferentiation. These object files can be compiled together to form\none executable.  In the example presented in section\n\\ref{sec:realworld}, the \\code{calc\\_function\\_value} function would\nbe one that would be compiled twice in this way, once to provide the\n\\code{calc\\_function\\_value(const adouble*)} version and the other to\nprovide the \\code{calc\\_function\\_value(const double*)} version. Note\nthat any functions that do not include \\code{adouble} arguments must\nbe compiled only once, because otherwise the linker will complain\nabout multiple versions of the same function.\n\nThe following shows a Makefile from a hypothetical project that\ncompiles two source files (\\code{algorithm1.cpp} and\n\\code{algorithm2.cpp}) twice and a third (\\code{main.cpp}) once:\n%\n\\begin{lstlisting}[language=make]\n # Specify compiler and flags\n CXX = g++\n CXXFLAGS = -Wall -O3 -g\n # Normal object files to be created\n OBJECTS = algorithm1.o algorithm2.o main.o\n # Object files created with no automatic differentiation\n NO_AD_OBJECTS = algorithm1_noad.o algorithm2_noad.o\n # Program name\n PROGRAM = my_program\n # Include-file location\n INCLUDES = -I/usr/local/include\n # Library location and name, plus the math library\n LIBS = -L/usr/local/lib -lm -ladept\n\n # Rule to build the program (typing \"make\" will use this rule)\n $(PROGRAM): $(OBJECTS) $(NO_AD_OBJECTS)\n         $(CXX) $(CXXFLAGS) $(OBJECTS) $(NO_AD_OBJECTS) $(LIBS) -o $(PROGRAM)\n # Rule to build a normal object file (used to compile all objects in OBJECTS)\n %.o: %.cpp\n         $(CXX) $(CXXFLAGS) $(INCLUDES) -c $<\n # Rule to build a no-automatic-differentiation object (used to compile ones in NO_AD_OBJECTS)\n %_noad.o: %.cpp\n         $(CXX) $(CXXFLAGS) $(INCLUDES) -DADEPT_NO_AUTOMATIC_DIFFERENTIATION -c $< -o $@\n\\end{lstlisting}\n%\n\nThere is a further modification required with this approach, which\narises because if a header file declares both the \\code{double} and\n\\code{adouble} versions of a function, then when compiled with\n\\code{-DADEPT\\_NO\\_AUTOMATIC\\_DIFFERENTIATION} it appears to the\ncompiler that the same function is declared twice, leading to a\ncompile-time error.  This can be overcome by using the preprocessor to\nhide the \\code{adouble} version if the code is compiled with this\nflag, as follows (using the example from section \\ref{sec:realworld}):\n%\n\\begin{lstlisting}\n #include <adept.h>\n class State {\n  public:\n    ...\n    double calc_function_value(const double* x);\n  private:\n #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n    adouble calc_function_value(const adouble* x);\n #endif\n    ...\n };\n\\end{lstlisting}\n\nA final nuance is that if the code contains an \\code{adouble} object\n\\code{x}, then \\code{x.value()} will work fine in the compilation when\n\\code{x} is indeed of type \\code{adouble}, but in the compilation when\nit is set to a simple \\code{double} variable, the \\code{value()}\nmember function will not be found.  Hence it is better to use\n\\code{adept::value(x)}, which returns a \\code{double} regardless of\nthe type of \\code{x}, and works regardless of whether the code was\ncompiled with or without the\n\\code{-DADEPT\\_NO\\_AUTOMATIC\\_DIFFERENTIATION} flag.\n\n\\section{Interfacing with software containing hand-coded Jacobians}\n\\label{sec:interfacehandcoded}\nOften a complicated algorithm will include multiple components.\nComponents of the code written in C or C++ for which the source is\navailable are straightforward to convert to using \\Adept, following\nthe rules in section \\ref{sec:preparation}.  For components written in\nFortran, this is not possible, but if such components have their own\nhand-coded Jacobian then it is possible to interface \\Adept\\ to them.\nMore generally, in certain situations automatic differentiation is\nmuch slower than hand-coding \\cite[see the Lax-Wendroff example\n  in][]{Hogan2014} and we may wish to hand-code certain critical\nparts.  In general the Jacobian matrix is quite expensive to compute,\nso this interfacing strategy makes most sense if the component of the\nalgorithm has a small number of inputs or a small number of outputs. A\nfull working version of the following example is given as ``Test 3''\nin the \\code{test} directory of the \\Adept\\ package (see specifically\n\\code{test/README} and \\code{test/test\\_radiances.cpp}).\n\nConsider the example of a radiative transfer model for simulating\nsatellite microwave radiances at two wavelengths, $I$ and $J$, which\ntakes as input the surface temperature $T_s$ and the vertical profile\nof atmospheric temperature $T$ from a numerical weather forecast\nmodel. Such a model would be used in a data assimilation system to\nassimilate the temperature information from the satellite observations\ninto the weather forecast model. In addition to returning the\nradiances, the model returns the gradient $\\partial I/\\partial T_s$\nand the gradients $\\partial I/\\partial T_i$ for all height layers $i$\nbetween 1 and $n$, and likewise for radiance $J$. The interface to the\nradiative transfer model is the following:\n%\n\\begin{lstlisting}\n void simulate_radiances(int n, // Size of temperature array\n                         // Input variables:\n                         double surface_temperature, \n                         const double* temperature,\n                         // Output variables:\n                         double radiance[2],\n                         // Output Jacobians:\n                         double dradiance_dsurface_temperature[2],\n                         double* dradiance_dtemperature);\n\\end{lstlisting}\n%\nThe calling function needs to allocate \\code{2*n} elements for the\ntemperature Jacobian \\code{dradiance\\_dtemperature} to be stored, and\nthe stored Jacobian will be oriented such that the radiance index\nvaries fastest.\n\n\\Adept\\ needs to be told how to relate the radiance perturbations\n$\\delta I$ and $\\delta J$, to perturbations in the input\nvariables, $\\delta T_s$ and $\\delta T_i$ (for all layers\n$i$). Mathematically, we wish the following relationship to be stored\nwithin the \\Adept\\ stack:\n%\n\\begin{equation}\n\\delta I = \\frac{\\partial I}{\\partial T_s}\\delta\nT_s+\\sum_{i=1}^n\\frac{\\partial I}{\\partial T_i}\\delta T_i.\\nonumber\n\\end{equation}\n%\nThis is achieved with the following wrapper function, which has\n\\code{adouble} inputs and outputs and therefore can be called from\nwithin other parts of the algorithm that are coded in terms of\n\\code{adouble} objects:\n%\n\\begin{lstlisting}\n void simulate_radiances_wrapper(int n,\n                                 const adouble& surface_temperature,\n                                 const adouble* temperature,\n                                 adouble radiance[2]) {\n   // Create inactive (double) versions of the active (adouble) inputs\n   double st = value(surface_temperature);\n   std::vector<double> t(n);\n   for (int i = 0; i < n; ++i) t[i] = value(temperature[i]);\n\n   // Declare variables to hold the inactive outputs and their Jacobians\n   double r[2];\n   double dr_dst[2];\n   std::vector<double> dr_dt(2*n);\n\n   // Call the non-Adept function\n   simulate_radiances(n, st, &t[0], &r[0], dr_dst, &dr_dt[0]);\n\n   // Copy the results into the active variables, but use set_value in order\n   // not to write any equivalent differential statement to the Adept stack\n   radiance[0].set_value(r[0]);\n   radiance[1].set_value(r[1]);\n\n   // Loop over the two radiances and add the differential statements to the Adept stack\n   for (int i = 0; i < 2; ++i) {\n     // Add the first term on the right-hand-side of Equation 1 in the text\n     radiance[i].add_derivative_dependence(surface_temperature, dr_dst[i]);\n     // Now append the second term on the right-hand-side of Equation 1. The third argument\n     // \"n\" of the following function says that there are n terms to be summed, and the fourth \n     // argument \"2\" says to take only every second element of the Jacobian dr_dt, since the \n     // derivatives with respect to the two radiances have been interlaced.  If the fourth \n     // argument is omitted then relevant Jacobian elements will be assumed to be contiguous\n     // in memory.\n     radiance[i].append_derivative_dependence(temperature, &dr_dt[i], n, 2);\n   }\n }\n\\end{lstlisting}\n%\nIn this example, the form of \\code{add\\_derivative\\_dependence} for\none variable on the right-hand-side of the derivative expression has\nbeen used, and the form of \\code{append\\_derivative\\_dependence} for\nan array of variables on the right-hand-side has been used. As\ndescribed in section \\ref{sec:adouble}, both functions have forms that\ntake single variables and arrays as arguments. Note also that the use\nof \\code{std::vector<double>} rather than \\code{new double[n]} ensures\nthat if \\code{simulate\\_radiances} throws an exception, the memory\nallocated to hold \\code{dr\\_dt} will be freed correctly.\n\n\\section{Member functions of the \\codestyle{Stack} class}\n\\label{sec:stack}\nThis section describes the user-oriented member functions of the\n\\code{Stack} class. Some functions have arguments with default values;\nif these arguments are omitted then the default values will be used.\nSome of these functions throw \\Adept\\ exceptions, defined in section\n\\ref{sec:exceptions}.\n\n\\begin{description}\n\\citem{Stack(bool activate\\_immediately = true)} The constructor for the\n\\codebf{Stack} class.  Normally \\codebf{Stack} objects are constructed\nwith no arguments, which means that the object will attempt to make\nitself the currently active stack by placing a pointer to itself into\na global variable.  If another \\codebf{Stack} object is currently\nactive, then the present one will be fully constructed, left in the\nunactivated state, and an \\code{stack\\_already\\_active} exception\nwill be thrown.  If a \\codebf{Stack} object is constructed with an\nargument ``\\codebf{false}'', it will be started in an unactivated\nstate, and a subsequent call to its member function \\codebf{activate}\nwill be needed to use it.\n%\n\\citem{void new\\_recording()} Clears all the information on the stack\nin order that a new recording can be started. Specifically this\nfunction clears all the differential statements, the list of\nindependent and dependent variables (used in computing Jacobian\nmatrices) and the list of gradients used by the\n\\codebf{compute\\_tangent\\_linear} and \\codebf{compute\\_adjoint} functions.\nNote that this function leaves the memory allocated to reduce the\noverhead of reallocation in the new recordings.\n%\n\\citem{bool pause\\_recording()} Stops recording differential\n  information every time an \\code{adouble} statement is\n  executed. This is useful if within a single program an algorithm\n  needs to be run both with and without automatic\n  differentiation. This option is only effective within compilation\n  units compiled with \\code{ADEPT\\_RECORDING\\_PAUSABLE} defined; if it is,\n  the function returns \\code{true}, otherwise it returns\n  \\code{false}. Further information on using this and the following\n  function are provided in section \\ref{sec:pausable}.\n%\n\\citem{bool continue\\_recording()} Instruct a stack that may have\npreviously been put in a paused state to now continue recording\ndifferential information as normal.  This option is only effective within\ncompilation units compiled with \\code{ADEPT\\_RECORDING\\_PAUSABLE}\ndefined; if it is, the function returns \\code{true}, otherwise it\nreturns \\code{false}.\n%\n\\citem{bool is\\_recording()} Returns \\code{false} if recording has\n  been paused with \\code{pause\\_recording()} and the code has been\n  compiled with \\code{ADEPT\\_RECORDING\\_PAUSABLE} defined.\n  Otherwise returns \\code{true}.\n%\n\\citem{void compute\\_tangent\\_linear()} Perform a tangent-linear\ncalculation (forward-mode differentiation) using the stored\ndifferential statements.  Before calling this function you need call\nthe \\code{adouble::set\\_gradient} or \\code{set\\_gradients} function (see\nsection \\ref{sec:adouble}) on the independent variables to set the\ninitial gradients, otherwise the function will throw a\n\\code{gradients\\_not\\_initialized} exception. This function is\nsynonymous with \\codebf{forward()}.\n%\n\\citem{void compute\\_adjoint()} Perform an adjoint calculation\n(reverse-mode differentiation) using the stored differential\nstatements.  Before calling this function you need call the\n\\code{adouble::set\\_gradient} or \\code{set\\_gradients} function on the\ndependent variables to set the initial gradients, otherwise the\nfunction will throw a \\code{gradients\\_not\\_initialized}\nexception. This function is synonymous with \\codebf{reverse()}.\n%\n\\citem{void independent(const adouble\\&\\ x)} Before computing Jacobian\n  matrices, you need to identify the independent and dependent\n  variables, which correspond to the columns and rows of he Jacobian,\n  respectively. This function adds \\codebf{x} to the list of\n  independent variables. If it is the $n$th variable identified in\n  this way, the $n$th column of the Jacobian will correspond to\n  derivatives with respect to \\codebf{x}.\n\\citem{void dependent(const adouble\\&\\ y)} Add \\codebf{y} to the\n  list of dependent variables.  If it is the $m$th variable identified\n  in this way, the $m$th row of the Jacobian will correspond to\n  derivatives of \\codebf{y} with respect to each of the independent\n  variables.\n\\citem{void independent(const adouble* x\\_ptr, \\Offset\\ n)} Add\n  \\codebf{n} independent variables to the list, which must be\n  stored consecutively in memory starting at the memory pointed to by\n  \\codebf{x\\_ptr}.\n\\citem{void dependent(const adouble* y\\_ptr, \\Offset\\ n)} Add\n\\codebf{n} dependent variables to the list, which must be stored\nconsecutively in memory starting at the memory pointed to by\n\\codebf{y\\_ptr}.\n%\n\\citem{void jacobian(double* jacobian\\_out)} Compute the Jacobian matrix, i.e., the gradient of the $m$\ndependent variables (identified with the \\codebf{dependent(...)}\nfunction) with respect to the $n$ independent variables (identified\nwith \\codebf{independent(...)}. The result is returned in the memory\npointed to by \\codebf{jacobian\\_out}, which must have been allocated\nto hold $m\\times n$ values. The result is stored in\ncolumn-major order, i.e., the $m$ diemension of the matrix varies\nfastest. If no dependents or independents have been identified,\nthen the function will throw a\n\\code{dependents\\_or\\_independents\\_not\\_identified} exception. In\npractice, this function calls \\codebf{jacobian\\_forward} if $n\\le\nm$ and \\codebf{jacobian\\_reverse} if $n>m$.\n%\n\\citem{void jacobian(Matrix jac)} Compute Jacobian matrix and store in\na correctly sized \\Adept\\ \\code{Matrix} object \\codebf{jac}, which may\nbe a subset of an larger matrix. See chapter \\ref{chap:arrays} for a\nfull description of \\Adept\\ array objects.\n%\n\\citem{Matrix jacobian()} As above but the Jacobian matrix is returned\nfrom the function.\n%\n\\citem{void jacobian\\_forward(double* jacobian\\_out)} Compute the\nJacobian matrix by executing $n$ forward passes through the stored\nlist of differential statements; this is typically faster than\n\\codebf{jacobian\\_reverse} for $n\\le m$.\n%\n\\citem{void jacobian\\_forward(Matrix jac)} As above but store in a\ncorrectly sized \\Adept\\ \\code{Matrix} object \\codebf{jac}.\n%\n\\citem{Matrix jacobian\\_forward()} As above but the Jacobian matrix is\nreturned from the function.\n%\n\\citem{void jacobian\\_reverse(double* jacobian\\_out)} Compute the\nJacobian matrix by executing $m$ reverse passes through the stored\nlist of differential statements; this is typically faster than\n\\codebf{jacobian\\_forward} for $n>m$.\n%\n\\citem{void jacobian\\_reverse(Matrix jac)} As above but store in a\ncorrectly sized \\Adept\\ \\code{Matrix} object \\codebf{jac}.\n%\n\\citem{Matrix jacobian\\_reverse()} As above but the Jacobian matrix is\nreturned from the function.\n%\n\\citem{void clear\\_gradients()} Clear the gradients set with the\n\\code{set\\_gradient} member function of the \\code{adouble} class. This\nenables multiple adjoint and/or tangent-linear calculations to be\nperformed with the same recording.\n%\n\\citem{void clear\\_independents()} Clear the list of independent\nvariables, enabling a new Jacobian matrix to be computed from the same\nrecording but for a different set of independent variables.\n%\n\\citem{void clear\\_dependents()} Clear the list of dependent\nvariables, enabling a new Jacobian matrix to be computed from the same\nrecording but for a different set of dependent variables.\n%\n\\citem{\\Offset\\ n\\_independents()} Return the number of independent\nvariables that have been identified.\n%\n\\citem{\\Offset\\ n\\_dependents()} Return the number of dependent\nvariables that have been identified.\n%\n\\citem{\\Offset\\ n\\_statements()} Return the number of differential\nstatements in the recording.\n%\n\\citem{\\Offset\\ n\\_operations()} Return the total number of operations\nin the recording, i.e the total number of terms on the right-hand-side\nof all the differential statements.\n%\n\\citem{\\Offset\\ max\\_gradients()} Return the number of working gradients\nthat need to be stored in order to perform a forward or reverse pass.\n%\n\\citem{size\\_t memory()} Return the number of bytes currently\nused to store the differential statements and the working\ngradients. Note that this does not include memory allocated but not\ncurrently used.\n%\n\\citem{\\Offset\\ n\\_gradients\\_registered()} Each time an\n\\code{adouble} object is created, it is allocated a unique index that\nis used to identify its gradient in the recorded differential\nstatements. When the object is destructed, its index is freed for\nreuse. This function returns the number of gradients currently\nregistered, equal to the number of \\code{adouble} objects currently\ncreated.\n%\n\\citem{void print\\_status(std::ostream\\&\\ os = std::cout)} Print the\ncurrent status of the \\codebf{Stack} object, such as number of\nstatements and operations stored and allocated, to the stream\nspecified by \\codebf{os}, or standard output if this function is\ncalled with no arguments.  Sending the \\codebf{Stack} object to the\nstream using the ``\\code{<<}'' operator results in the same behaviour.\n%\n\\citem{void print\\_statements(std::ostream\\&\\ os = std::cout)} Print\nthe list of differential statements to the specified stream (or\nstandard output if not specified). Each line corresponds to a separate\nstatement, for example ``\\code{d[3] = 1.2*d[1] + 3.4*d[2]}''.\n%\n\\citem{bool print\\_gradients(std::ostream\\&\\ os = std::cout)} Print\nthe vector of gradients to the specified stream (or standard output if\nnot specified). This function returns\n\\code{false} if no \\code{set\\_gradient}\nfunction has been called to set the first gradient and initialize the\nvector, and \\code{true} otherwise. To diagnose what\n\\codebf{compute\\_tangent\\_linear} and \n\\codebf{compute\\_adjoint} are doing, it can be useful to call\n\\codebf{print\\_gradients} immediately before and after.\n%\n\\citem{void activate()} Activate the \\codebf{Stack} object by copying\nits \\code{this} pointer to a global variable that will be accessed by\nsubsequent operations involving \\code{adouble} objects.  If another\n\\codebf{Stack} is already active, a \\code{stack\\_already\\_active}\nexception will be thrown. To check whether this is the case before\ncalling \\codebf{activate()}, check that the \\code{active\\_stack()}\nfunction (described below) returns \\code{0}.\n%\n\\citem{void deactivate()} Deactivate the \\codebf{Stack} object by\nchecking whether the global variable holding the pointer to the\ncurrently active \\codebf{Stack} is equal to \\code{this}, and if it is,\nsetting it to \\code{0}.\n%\n\\citem{bool is\\_active()} Returns \\code{true} if the \\codebf{Stack}\nobject is the currently active one, \\code{false} otherwise.\n%\n\\citem{void start()} This function was present in version 0.9 to\nactivate a \\codebf{Stack} object, since in that version they were not\nconstructed in an activated state.  This function has now been\ndeprecated and will always throw a \\code{feature\\_not\\_available}\nexception.\n\\citem{int max\\_jacobian\\_threads()} Return the maximum number of\nOpenMP threads available for Jacobian calculations.  The number will\nbe 1 if either the library was or the current source code is compiled\nwithout OpenMP support (i.e.\\ without the \\code{-fopenmp} compiler and\nlinker flag). (Introduced in \\Adept\\ version 1.1.) \n\\citem{int set\\_max\\_jacobian\\_threads(int n)} Set the maximum number of\nthreads to be used in Jacobian calculations to \\code{n}, if\npossible. A value of 1 indicates that OpenMP will not be used, while a\nvalue of 0 indicates that the maximum available will be used. Returns\nthe maximum that will be used, which may be fewer than requested,\ne.g. 1 if the \\Adept\\ library was compiled without OpenMP\nsupport. (Introduced in \\Adept\\ version 1.1.) \n\\citem{void preallocate\\_statements(int n)} If you know in advance\nroughly how many differential statements will be stored by an\nalgorithm then you may be able to speed-up the first use of the stack\nby preallocating the memory needed to store them.  More memory will\nstill be allocated if needed, but this should reduce the number of\nallocations and copies.\n\\citem{void preallocate\\_operations(int n)} Likewise, if you know in\nadvance roughly how many operations will be stored then you can\nspeed-up the first use of the stack with this member function.\n\\end{description}\n\n\\noindent The following non-member functions are provided in the\n\\code{adept} namespace:\n\\begin{description}\n\\citem{adept::Stack* active\\_stack()} Returns a pointer to the\ncurrently active \\codebf{Stack} object, or \\code{0} if there is none.\n\\citem{bool is\\_thread\\_unsafe()} Returns \\code{true} if your code has\nbeen compiled with \\code{ADEPT\\_STACK\\_THREAD\\_UNSAFE}, \\code{false}\notherwise.\n%\n\\end{description}\n\n\n\\section{Member functions of the \\codestyle{adouble} object}\n\\label{sec:adouble}\nThis section describes the user-oriented member functions of the\n\\code{adouble} class. Some functions have arguments with default\nvalues; if these arguments are omitted then the default values will be\nused. Some of these functions throw \\Adept\\ exceptions, defined in\nsection \\ref{sec:exceptions}.\n\\begin{description}\n\\citem{double value()} Return the underlying \\code{double} value.\n%\n\\citem{void set\\_value(double x)} Set the value of the \\codebf{adouble}\nobject to \\codebf{x}, without storing the equivalent differential\nstatement in the currently active stack.\n%\n\\citem{void set\\_gradient(const double\\&\\ gradient)} Set the\ngradient corresponding to this \\codebf{adouble} variable. The first call\nof this function (for any \\codebf{adouble} variable) after a new\nrecording is made also initializes the vector of working gradients.\nThis function should be called for one or more \\codebf{adouble} objects\nafter a recording has been made but before a call to\n\\code{Stack::compute\\_tangent\\_linear()} or\n\\code{Stack::compute\\_adjoint()}.\n%\n\\citem{void get\\_gradient(double\\&\\ gradient)} Set \\codebf{gradient}\nto the value of the gradient corresponding to this \\codebf{adouble}\nobject. This function is used to extract the result after a call to\n\\code{Stack::compute\\_tangent\\_linear()} or\n\\code{Stack::compute\\_adjoint()}. If the \\codebf{set\\_gradient} function\nwas not called since the last recording was made, this function will\nthrow a \\code{gradients\\_not\\_initialized} exception.  The function\ncan also throw a \\code{gradient\\_out\\_of\\_range} exception if new\n\\codebf{adouble} objects were created since the first\n\\codebf{set\\_gradient} function was called.\n%\n\\citem{void add\\_derivative\\_dependence(const adouble\\&\\ r, const\n  double\\&\\ g)} Add a differential statement to the currently active\nstack of the form $\\delta \\codebf{l}=\\codebf{g}\\times\\delta\n\\codebf{r}$, where \\codebf{l} is the \\codebf{adouble} object from which\nthis function is called.  This function is needed to interface to\nsoftware containing hand-coded Jacobians, as described in section\n\\ref{sec:interfacehandcoded}; in this case \\codebf{g} is the gradient\n$\\partial\\codebf{l}/\\partial\\codebf{r}$ obtained from such software.\n%\n\\citem{void append\\_derivative\\_dependence(const adouble\\&\\ r, const\n  double\\&\\ g)} Assuming that the same \\codebf{adouble} object has just\nhad its \\codebf{add\\_derivative\\_dependence} member function called,\nthis function appends ${}+\\codebf{g}\\times\\delta\\codebf{r}$ to the\nmost recent differential statement on the stack.  If the calling\n\\codebf{adouble} object is different, then a \\code{wrong\\_gradient}\nexception will be thrown. Note that multiple\n\\codebf{append\\_derivative\\_dependence} calls can be made in succession.\n%\n\\item[\\begin{minipage}{\\textwidth}\\codesize\\texttt{void \nadd\\_derivative\\_dependence(const adouble* r, const double* g,}\\\\ \n\\mbox{ }\\texttt{\\hspace{18em}\\Offset\\ n = 1, \\Offset\\\n      m\\_stride = 1)}\\end{minipage}]\n%\nAdd a differential statement to the currently active stack of the form\n$\\delta\\codebf{l}=\\sum_{i=0}^{\\codebf{n}-1}\\codebf{m[}i\\codebf{]}\n\\times\\delta\\codebf{r[}i\\codebf{]}$, where \\codebf{l} is the \\codebf{adouble}\nobject from which this function is called. If the \\codebf{g\\_stride}\nargument is provided, then the index to the \\codebf{g} array will be\n$i\\times\\codebf{g\\_stride}$ rather than $i$.  This is useful if the\nJacobian provided is oriented such that the relevant gradients for\n\\codebf{l} are not spaced consecutively.\n%\n\\item[\\begin{minipage}{\\textwidth}\\codesize\\texttt{void \nappend\\_derivative\\_dependence(const adouble* rhs, const double* g,}\\\\ \n\\mbox{ }\\texttt{\\hspace{20em}\\Offset\\ n = 1, \\Offset\\\n      g\\_stride = 1)}\\end{minipage}]\n%\nAssuming that the same \\codebf{adouble} object has just called the\n\\codebf{add\\_derivative\\_dependence} function, this function appends\n${}+\\sum_{i=0}^{\\codebf{n}-1}\\codebf{m[}i\\codebf{]}\n\\times\\delta\\codebf{r[}i\\codebf{]}$ to the most recent differential\nstatement on the stack. If the calling \\codebf{adouble} object is\ndifferent, then a \\code{wrong\\_gradient} exception will be\nthrown. The \\codebf{g\\_stride} argument behaves the same way as in the\nprevious function described.\n\\end{description}\n\n\\noindent The following non-member functions are provided in the\n\\code{adept} namespace:\n\\begin{description}\n\\citem{double value(const adouble\\& x)} Returns the underlying\nvalue of \\codebf{x} as a \\codebf{double}. This is useful to enable\n\\codebf{x} to be used in \\code{fprintf} function calls. It is\ngenerally better to use \\codebf{adept::value(x)} rather than\n\\codebf{x.value()}, because the former also works if you compile the\ncode with the \\code{ADEPT\\_NO\\_AUTOMATIC\\_DIFFERENTIATION} flag set,\nas discussed in section \\ref{sec:multipleobjects}.\n%\n\\citem{void set\\_values(adouble* x, \\Offset\\ n, const double* x\\_val)}\nSet the value of the \\codebf{n} \\codebf{adouble} objects starting at\n\\codebf{x} to the values in \\codebf{x\\_val}, without storing the\nequivalent differential statement in the currently active stack.\n%\n\\citem{void set\\_gradients(adouble* x, size\\_t n, const double*\n  gradients)} Set the gradients corresponding to the \\codebf{n}\n\\codebf{adouble} objects starting at \\codebf{x} to the \\codebf{n}\n\\code{double}s starting at \\codebf{gradients}.  This has the same\neffect as calling the \\codebf{set\\_gradient} member function of each\n\\codebf{adouble} object in turn, but is more concise.\n%\n\\citem{void get\\_gradients(const adouble* y, size\\_t n, double*\n  gradients)} Copy the gradient of the \\codebf{n} \\codebf{adouble}\nobjects starting at \\codebf{y} into the \\codebf{n} \\code{double}s\nstarting at \\codebf{gradients}. This has the same effect as calling\nthe \\codebf{get\\_gradient} member function of each \\codebf{adouble} object\nin turn, but is more concise.  This function can throw a\n\\code{gradient\\_out\\_of\\_range} exception if new \\codebf{adouble}\nobjects were created since the first \\codebf{set\\_gradients} function\nor \\codebf{set\\_gradient} member function was called.\n\\end{description}\n\n\\chapter{Using \\Adept's array functionality}\n\\label{chap:arrays}\n\n\\section{Introduction}\n\\label{sec:array_functionality}\nThe design of \\Adept's array capability and many of the functions is\ninspired to a significant extent by the built-in array support in\nFortran 90 (and later), and a lesser extent by Matlab, although\nimplemented in the ``C++ way'', e.g.\\ default row-major order with all\narray indices starting from zero.  Future additions to the array\ncapability in \\Adept\\ will attempt to reproduce built-in Fortran array\nfunctions if available\\footnote{This decision may puzzle some readers,\n  since Fortran is a dirty word to many C++ users due to the\n  limitations of the FORTRAN 77 language. Many of these limitations\n  were overcome in Fortran 90, whose array functionality in particular\n  is rather well designed. Indeed, the pioneering ``Blitz++'' C++\n  array library \\cite[]{Veldhuizen1995} also reproduces many Fortran\n  array functions. All references to Fortran in this document imply\n  the 1990 (or later) standard.}. This design makes \\Adept\\ a good\nchoice if you have Fortran code that you wish to convert to C++.\n\\Adept\\ provides the following array functionality:\n%\n\\begin{description}\n\\item[Multi-dimensional arrays.]  Standard dynamically sized arrays\n  can have an arbitrary number of dimensions (although indexing and\n  slicing is supported only up to 7), and may refer to non-contiguous\n  areas of memory. See section \\ref{sec:array}.\n\\item[Mathematical operators and functions.] \\Adept\\ supports array\n  expressions containing the standard mathematical operators \\code{+},\n  \\code{-}, \\code{*} and \\code{/}, as well as their assignment\n  versions \\code{+=}, \\code{-=}, \\code{*=} and \\code{/=}. When applied\n  to arrays, they work ``element-wise'', applying the same operation\n  to every element of the arrays. \\Adept\\ also supports array\n  operations on all the mathematical functions listed in section\n  \\ref{sec:ad_functionality}. The following operators and functions\n  return boolean array expressions: \\code{==}, \\code{!=}, \\code{>},\n  \\code{<}, \\code{>=} and \\code{<=}, \\code{isfinite}, \\code{isinf} and\n  \\code{isnan}.  See section \\ref{sec:operators}.\n\\item[Array slicing.] There are many ways to produce an array that\n  references a subset of another array, and therefore can be used as\n  an lvalue in a statement. Arrays can be indexed with scalar\n  integers, a contiguous range of integers, a strided range of\n  integers or an arbitrary list of integers.  This is facilitated with\n  ``\\code{\\_\\_}'' (a double underscore) and ``\\code{end}'', such that\n  \\code{A(\\_\\_,end-1)} returns a vector pointing to the penultimate\n  column of matrix \\code{A}. The member function \\code{subset}\n  produces an array pointing to a contiguous subset of the original\n  array, while \\code{diag\\_vector} and \\code{diag\\_matrix} produce\n  arrays pointing to the diagonal of the original array.  \\code{T}\n  produces an array pointing to the transpose of the original array.\n  See section \\ref{sec:slice}.\n\\item[Passing arrays to and from functions.] \\Adept\\ uses a\n  reference-counting approach to implement the storage of array data,\n  enabling multiple array objects to point to the same data, or parts\n  of it in the case of array slices. This makes it straightforward to\n  pass arrays to and from functions without having to perform a deep\n  copy. See section \\ref{sec:passing}.\n\\item[Array reduction operations.] The functions \\code{sum},\n  \\code{mean}, \\code{product}, \\code{minval}, \\code{maxval} and\n  \\code{norm2} perform reduction operations that return an array of\n  lower rank to the expression they are applied to. The functions\n  \\code{all} and \\code{any} do the same but for boolean\n  expressions. \\code{count} returns the number of \\code{true} elements\n  in a boolean expression.\n% The function\n%  \\code{find(A)} returns indices to the \\code{true} elements of\n%  \\code{A}. \n  See section \\ref{sec:reduce}.\n\\item[Array expansion operations.] The functions \\code{outer\\_product}\n  and \\code{spread} return an expression of a higher rank than the\n  expression they are applied to. See section \\ref{sec:expand}\n\\item[Conditional operations.] Two convenient ways are provided to\n  perform an operation on an array depending on the result of a\n  boolean expression: \\code{where} and \\code{find}. The statement\n  \\code{A.where(B>0)=C} assigns elements of \\code{C} to elements of\n  \\code{A} whenever the corresponding element of \\code{B} is greater\n  than zero. For vectors only, the same result could be obtained with\n  \\code{A(find(B>0))=C(find(B>0))}. See section \\ref{sec:conditional}.\n\\item[Fixed-size arrays.] \\Adept\\ provides a fixed-size array class\n  with dimensions (up to seven) that are known at compile time. The\n  functionality is very similar to standard dynamic arrays.\n\\item[Special square matrices.] \\Adept\\ uses specific classes for\n  symmetric, triangular and band-diagonal matrices, the latter of\n  which use compressed storage and include diagonal and tridiagonal\n  matrices. Certain operations such as matrix multiplication and solving\n  linear equations are optimized especially for these objects. See\n  section \\ref{sec:square}.\n\\item[Matrix multiplication.] Matrix multiplication can be applied to\n  one- and two-dimensional arrays using the \\code{matmul} function, of\n  for extra syntactic sugar, the ``\\code{**}''\n  pseudo-operator. \\Adept\\ uses whatever BLAS (Basic Linear Algebra\n  Subroutines) support is available on your system, including\n  optimized versions for symmetric and band-diagonal matrices. See\n  section \\ref{sec:matmul}.\n\\item[Linear algebra.] \\Adept\\ uses the LAPACK library to invert\n  matrices and solve linear systems of equations. See section\n  \\ref{sec:la}.\n\\item[Array bounds and alias checking.] \\Adept\\ checks at compile time\n  that terms in an array expression accord in rank, and at run time\n  that they accord in the size of each dimension. Run-time alias\n  checking is performed to determine if any objects on the\n  right-hand-side of a statement overlap in memory with the\n  left-hand-side of the statement, making a temporary copy of the\n  right-hand-side if they do. This can be overridden with the\n  \\code{noalias} function. See section \\ref{sec:bounds}.\n\\item[Interoperability with Fortran arrays.] The Fortran 2018 standard\n  enables Fortran's assumed-shape arrays to be passed to and from\n  C/C++. Section \\ref{sec:fortran} describes how they can be treated\n  as \\Adept\\ arrays within C++.\n\\end{description}% \n%\n\n\\section{The \\codestyle{Array} class}\n\\label{sec:array}\nThe bread and butter of array operations is provided by the\n\\code{Array} class template (in the \\code{adept} namespace along with\nall other public types and classes), which has the following declaration:\n\\begin{lstlisting}\n namespace adept {\n   template <int Rank, typename Type = Real, bool IsActive = false>\n   class Array;\n }\n\\end{lstlisting}\nThe first template argument provides the number of dimensions of the\narray and may be 1 or greater, although indexing and slicing is only\nsupported up to 7 dimensions. The second argument is the numerical\ntype being stored and can be any simple integer or real number,\nincluding \\code{bool}. The default type is \\code{adept::Real}, which\nis the default floating-point type the \\Adept\\ library has been\ncompiled to use for computing derivatives, and is usually\n\\code{double}. The final argument states whether the array is\n``active'', i.e.\\ whether it participates in the differentiation of an\nalgorithm.\n\nA number of typedefs are provided for the most common types of array:\n\\code{Vector}, \\code{Matrix}, \\code{Array3D} and so on up to\n\\code{Array7D} provide inactive arrays of type \\code{Real} and rank\n1--7. The corresponding active types are \\code{aVector},\n\\code{aMatrix}, \\code{aArray3D} etc. Arrays of other numeric types\nhave the pattern \\code{boolVector}, \\code{intVector},\n\\code{floatVector}, \\code{afloatVector}, and similarly for matrices\nand higher dimensional arrays. If you wanted shortcuts for\nother types you could do the following:\n\\begin{lstlisting}\n typedef adept::Array<4,unsigned int> uintArray4D;\n typedef adept::Array<2,long double,true> alongdoubleMatrix; // Active\n\\end{lstlisting}\n\nAn \\code{Array} with uninitialized elements can be constructed in\nnumerous ways:\n\\begin{lstlisting}\n using namespace adept;\n Vector v;                  // Initialize an empty vector\n Array3D A(3,4,5);          // Initialize a 3x4x5 array (up to 7 arguments possible)\n Matrix M(dimensions(3,4)); // The \"dimensions\" function takes up to 7 arguments\n Matrix N(M.dimensions());  // Make N the same size as M\n\\end{lstlisting}\nIn the remaining code examples it will be assumed that\n\\code{using namespace adept} has already been called.  When new memory\nis needed, the \\code{Array} object creates a \\code{Storage} object\nthat contains the memory needed, and stores pointers to both the\n\\code{Storage} object and the start of the data. By default the data\nare accessed in C-style row-major order (i.e.\\ the final index\ncorresponds to the array dimension that varies most rapidly in\nmemory). However, this is flexible since in addition to storing the\nlength of each of its $n$ dimensions, a rank-$n$ \\code{Array} also\nstores $n$ ``offsets'' that define the separation of elements in\nmemory in each dimension. Thus, a 3-by-4 matrix with row-major storage\nwould store offsets of (4,1). The same size matrix would use\ncolumn-major storage simply by storing offsets of (1,3). To make new\narrays use column-major storage, call the following function:\n\\begin{lstlisting}\n set_array_row_major_order(false);\n\\end{lstlisting}\nNote that this does not change the storage of any existing\nobjects. Note also that when array expressions are evaluated, the data\nare requested in row-major order, so the use of column-major arrays\nwill incur a performance penalty.\n\nAn \\code{Array} may also be constructed such that it immediately\ncontains data:\n\\begin{lstlisting}\n Vector v = M(__,0); // Link to a existing array, in this case the first column of M\n Vector v(M(__,0));  // Has exactly the same effect as the previous example\n Matrix N = log(M);  // Initialize with the size and values of a mathematical expression\n\\end{lstlisting}\nIt can be seen from the constructors involving \\code{Vector}s that an\n\\code{Array} can be configured to ``link'' to part of an existing\n\\code{Array}, and modifications to the numbers in one will be seen by\nthe other. This is a very useful feature as it allows slices of an\narray to be passed to functions and modified; see section\n\\ref{sec:slice}. Note that the array or sub-array being linked to must\nbe of the same rank, type and activeness as the linking array.\nInternally, linking is achieved by both the arrays pointing to the\nsame \\code{Storage} object, which itself contains a reference count of\nthe number of arrays pointing to it. When an \\code{Array} is\ndestructed the reference count is reduced by one and only if it falls\nto zero will the data get deallocated. This ensures that if the\n\\code{Array} being linked to goes out of scope, the linking\n\\code{Array} will ``steal'' the data.\n\nYou can also make an \\code{Array} point to data not held in a\n\\code{Storage} object, for example in a function whose interface is\nonly in terms of intrinsic C types:\n\\begin{lstlisting}\n double my_norm2(int n, double* ptr) {\n   Vector x(ptr, dimensions(n)); // Create a Vector pointing to existing data\n   return norm2(x);              // Use Adept's L2-norm function\n }\n\\end{lstlisting}\nThe \\code{Vector} in this example can be used in the same way as any\nother array, but relies on the existing data not being deallocated for\nthe lifetime of the \\code{Vector}.\n\nAfter it has been constructed, an \\code{Array} can be resized,\nrelinked or cleared completely as follows:\n\\begin{lstlisting}\n M.resize(5,2);            // Works up to 7 dimensions\n M.resize(dimension(5,2)); // As above\n N.resize(M.dimensions()); // Resize N to be the same size as M\n v.link(M(end-1,__));      // Size of v set to that of the argument and link to data\n v >>= M(end-1,__);        // Convenient syntax for linking, similar to Fortran's \"->\"\n M.clear();                // Returns array to original empty state\n\\end{lstlisting}\nThe member functions \\code{resize} and \\code{clear} unlink from any\nexisting data, which involves deallocation if no other array is\npointing to the same data. If the \\code{link} function, or the\nalternative ``\\code{>>=}'' syntax, is applied with a non-empty array\non the left-hand-side then the existing data will be quietly cleared\nbefore linking to the new data. Note that if you assign one array to\nanother (e.g.\\ \\code{N=M}), then they must be of the same size; if\nthey are not then you should clear the left-hand-side first. By\ndefault, resized arrays are row-major, unless\n\\code{set\\_array\\_row\\_major(false)} has been called. To explicitly\nspecify the ordering, you may use the \\code{resize\\_row\\_major} or\n\\code{resize\\_column\\_major} member functions in place of\n\\code{resize}.\n\nThe \\code{Array} class implements a number of member functions for\ninquiring about its properties:\n\\begin{description}\n\\citem{size()} Returns the total number of elements, i.e.\\ the product\nof the lengths of each of the dimensions.\n\\citem{dimension(i)} Returns the length of dimension \\code{i}.\n\\citem{offset(i)} Returns the separation in memory of elements along\ndimension \\code{i}.\n\\citem{gradient\\_index()} For active arrays, returns the gradient\nindex of the first element of the array, which is always positive; for\ninactive arrays it returns a negative number.\n\\citem{empty()} Returns \\code{true} if the array is in the empty\nstate, or \\code{false} otherwise.\n\\citem{dimensions()} Returns a object listing the extent of each\ndimension in the \\code{Array}, useful for resizing other arrays.  The\nobject is actually of type \\code{ExpressionSize<int Rank>} (where\n\\code{Rank} is the rank of the array), a thin wrapper for a simple\n  \\code{int[Rank]} C-array, although it is rare to need to use it\n  explicitly.\n\\citem{offset()} Returns an object (also of type\n\\code{ExpressionSize<int Rank>}) describing how array indices are\ntranslated into memory offsets.\n\\end{description}\n\nAn \\code{Array} may be filled using the \\code{<<} operator for the\nfirst element followed by either the \\code{<<} or \\code{,} operators\nfor subsequent elements:\n\\begin{lstlisting}\n Vector v(4);\n v << 1 << 2 << 3 << 4; // Fill the four elements of v\n v << 1, 2, 3, 4;       // Same behaviour but easier on the eye\n v << 1, 2, 3, 4, 5;    // Error: v has been overfilled\n Matrix M(2,4);\n M << 1, 2, 3, 4,       // Filling of multi-dimensional arrays\n      5, 6, 7, 8;       // automatically moves on to next dimension\n M << 1, 2, 3, 4,\n      v;                // v treated as a row vector here\n\\end{lstlisting}\nFor multidimensional arrays, elements are filled such that the final\ndimension ticks over fastest (regardless of whether the array uses\nrow-major storage internally), and new rows are started when a row is\ncomplete. Moreover, other arrays can be part of the list of elements,\nprovided that they fit in.  In this context, a rank-1 array is treated\nas a row vector. An \\code{index\\_out\\_of\\_bounds} exception is thrown\nif an array is overfilled, while an \\code{empty\\_array} exception is\nthrown if an attempt is made to fill an empty array.\n\n\\cxx11 \\begin{leftbar} If you compile your code with C++11 features\n  enabled then you can use the ``initializer list'' feature to fill\n  arrays using the C-like curly bracket syntax:\n\\begin{lstlisting}\n Vector v;              // Construct an empty vector\n v = {1, 2, 3};         // Resize to length 3 and fill\n Vector w = {1, 2, 3};  // Construct a vector of length 3 and fill\n w = {4.4, 5.5};        // Underfill leads to remaining elements set to zero (as in C)\n w = {6, 7, 8, 9};      // Overfill leads to size_mismatch exception being thrown\n Matrix M = {{1, 2, 3}, // Multi-dimensional arrays use nested curly brackets;\n             {4, 5}};   //  ...underfill again leads to remaining elements set to zero\n\\end{lstlisting}\nAnother convenient property of this syntax is that temporary arrays\nwith explicit values can be used in expressions:\n\\begin{lstlisting}\n v = w * Vector{3.0, 4.2, 5.1};\n\\end{lstlisting}\n\\end{leftbar}\n\nWhen interfacing with other libraries, direct access to the data is\noften required. The \\code{Array} class provides the following member\nfunctions:\n\\begin{description}\n\\citem{data()} Returns a pointer to the first element in the array,\ni.e.\\ the element found by indexing all the dimensions of the array\nwith zero. It is up to the caller to understand the layout of the data\nin memory and not to stray outside.  Remember that an array may be\nstrided and the stride may even be negative so that the data returned\nfrom increasing indices are actually from earlier memory\naddresses. Note that a double-precision active array is not stored as\nan array of \\code{adouble} objects, but as an array of \\code{double}\ndata and a single gradient index for the first element. Thus the\npointer returned by \\code{data()} will point to the underlying\ninactive data.  In contexts where the \\code{Array} object is\n\\code{const}, a \\code{const} pointer will be returned. Note that in a\nmulti-dimensional array, successive array dimensions are not\nguaranteed to be contiguous in memory since it is sometimes\nadvantageous for vectorization for \\Adept\\ to pad the rows to an alignment\nboundary. You can use the output of the \\code{offset()} member\nfunction to determine the spacing of the elements in each dimension.\n%\n\\citem{const\\_data()} It is sometimes convenient to specify explicitly\nthat read-only access is required, in which case you can use\n\\code{const\\_data()} to return a \\code{const} pointer to the first\nelement in the array.\n\\end{description} \n\n\\section{Operators and mathematical functions}\n\\label{sec:operators}\nThe operators and mathematical functions listed in section\n\\ref{sec:ad_functionality} have been overloaded so that they work exactly as you\nwould expect. Consider this example:\n\\begin{lstlisting}\n floatVector a(5);      // Inactive single-precision vector\n aVector b(5), c(5);    // Active vectors\n aReal d;               // An active scalar\n // ... other code manipulating a-d ...\n b = 2.0;               // Set all elements of b to a scalar value\n c += 5.0*a + sin(b)/d; // Add the right-hand-side to c\n\\end{lstlisting}\nThe penultimate illustrates that all elements of an \\code{Array} can\nbe set to the same value, although note that this will only work if\nthe array is not in the empty state. The final line illustrates how\nterms with different rank, type and activeness can participate in the\nsame expression. Scalars and arrays can participate in the same\nexpression on the right-hand-side of a statement provided that the\narrays have the same size as the array on the left-hand-side. Objects\nof different type (in this case single and double precision) can be\ncombined in a mathematical operation, and the type of that operation\nwill be the larger (higher precision) of the two types. If active and\ninactive objects participate in an expression then the left-hand-side\nmust also be active. Expression templates ensure that no temporary\narrays need to be created to store the output of intermediate parts of\nthe expression.  The functions \\code{max} and \\code{min} behave just\nlike binary operators (such as \\code{+} and \\code{*}) in this regard,\nas shown by the following:\n\\begin{lstlisting}\n c = max(a,b);          // Element-wise comparison of a and b \n c = min(a,3.0);        // Return minimum of each element of a and 3\n\\end{lstlisting}\n\nThe examples so far have floating-point results, but some operators\n(e.g.\\ ``\\code{==}'') and some functions (e.g.\\ \\code{isinf}) take\nfloating-point arguments and return a boolean.  The \\Adept\\ versions\ntake floating-point array expressions as arguments and return\n\\code{bool} expressions of the same rank and size. Finally, the\n\\Adept\\ versions of the operators \\code{!}, \\code{||} and \\code{\\&\\&}\ntake a \\code{bool} expression as arguments and return a \\code{bool}\nexpression of the same size and rank.\n\n\\section{Array slicing}\n\\label{sec:slice}\nThis section concerns the many ways that sub-parts of an \\code{Array}\ncan be extracted to produce an object that can be used as an lvalue;\nthat is, if the object is modified then it will modify part of the\noriginal \\code{Array}. It should be stressed that none of these\nmethods results in any rearrangement of data in memory, so they should\nbe efficient.\n\nThe first way this can be done is via the function-call and\nmember-access operators (i.e.\\ \\code{operator()} and\n\\code{operator[]}, respectively) of the \\code{Array}. In the case of\nthe function-call operator, the same number of arguments as the rank\nof the array must be provided, where each argument states how its\ncorresponding dimension should be treated.  The nature of the\nresulting object depends on the type of all of the arguments in a way\nthat is similar to how Fortran arrays behave, although note that\narray indices always start at 0. The four different behaviours are as\nfollows:\n\n\n\\begin{description}\n\\item[Extract single value.] If every argument is an integer scalar or\n  scalar expression, then a reference to a single element of the array\n  will be extracted. If an argument is an integer expression\n  containing \\code{end}, then \\code{end} will be interpretted to be\n  the index to the final element of that dimension (a feature borrowed\n  from Matlab). If the array is active then the returned object will\n  be of a special ``active reference'' type that can be used as an\n  lvalue and ensures that any expressions making use of this element\n  can be differentiated. Now for some examples:\n  \\begin{lstlisting}\n aMatrix A(4,3);\n aReal x = A(1,1);  // Copy element at second row and second column into x\n A(end-1,1) *= 2.0; // Double the element in the penultimate column and 2nd row of A\n A(3) = 4.0;        // Error: number of indices does not match number of dimensions\n  \\end{lstlisting}\n\\item[Extract regular subarray.] If every argument is either (i) an\n  integer scalar or scalar expression, or (ii) a regular range of\n  indices, and there is at least one of (ii), then an \\code{Array}\n  object will be returned of the same type and activeness as the\n  original. However, for each argument of type (i), the rank of the\n  returned array will be one less than that of the original. There are\n  three ways to express a regular range of indices: ``\\code{\\_\\_}''\n  represents all indices of a particular dimension, \\code{range(a,b)}\n  represents a contiguous range of indices between \\code{a} and\n  \\code{b} (equivalent to \\code{a:b} in Fortran and Matlab), and\n  \\code{stride(a,b,c)} represents a regular range of indices between\n  \\code{a} and \\code{b} with spacing \\code{c} (equivalent to\n  \\code{a:b:c} in Fortran and \\code{a:c:b} in Matlab). Note that\n  \\code{a}, \\code{b} and \\code{c} may be scalar expressions containing\n  \\code{end}, but \\code{c} must not be zero although it can be\n  negative to indicate a reversed ordering. The rank of the returned\n  array is known at compile time; thus if range arguments are found at\n  run-time to contain only one element (e.g.\\ \\code{range(1,1)}) then\n  the dimension being referred to will be not be removed in the\n  returned array but will remain as a singleton dimension. This\n  behaviour is the same as indexing an array dimension with \\code{1:1}\n  in Fortran. Now for some examples:\n\\begin{lstlisting}\n v(range(1,end-1))           // Subset of vector v that excludes 1st & last points\n A(0,stride(end,0,-1))       // First row of A as a vector treated in reverse order\n A(range(0,0),stride(0,0,1)) // A 1-by-1 matrix containing the first element of A\n\\end{lstlisting}\n\\item[Extract irregular subarray.] If an array is indexed as in either\n  of the two methods above, except that one or more dimensions is\n  instead indexed using a rank-1 \\code{Array} of integers, then the\n  result is a special ``indexed-array'' type that stores how each\n  dimension is indexed.  If it then participates either on the left-\n  or right-hand-side of a mathematical expression then when an element\n  is requested, the indices will be queried to map the request to\n  obtain the correct element from the original array. This is much\n  less efficient than using regular ranges of indices as above. It\n  also means that if an indexed array is passed to a function\n  expecting an object of type \\code{Array}, then it will first be\n  converted to an \\code{Array} and any modifications performed within\n  the function will not be passed back to the original array. For\n  example:\n\\begin{lstlisting}\n intVector index(3);\n index << 2, 3, 5;\n Array A(4,4);\n A(0,index) = 2.0; // Set irregularly spaced elements of the first row of A\n\\end{lstlisting}\n\\item[Slice leading dimension.] In C, an element is extracted from a\n  two-dimensional array using \\code{A[i][j]}, and \\code{A[i]} returns\n  a pointer to a single row of \\code{A}, where \\code{i} and \\code{j}\n  are integers. To enable similar functionality, if \\code{A} is an\n  \\Adept\\ matrix then \\code{A[i]} indexes the leading dimension by\n  integer \\code{i} returning an array of rank one less than the\n  original. This is equivalent to \\code{A(i,\\_\\_)}. Furthermore,\n  \\code{A[i][j]} will return an individual element as in C, but it\n  should be stressed that \\code{A(i,j)} is more efficient since it\n  does not involve the creation of intermediate arrays.\n\\end{description}\n%\nThere are a few other ways to produce lvalues that consist of a subset\nor a reordering of an array. They are implemented as member functions\nof the \\code{Array} class, in order to distinguish from non-member\nfunctions that produce a copy of the data and therefore cannot be\nusefully used as lvalues.  For example, \\code{A.T()} and\n\\code{transpose(A)} both return the transpose of matrix \\code{A}, but\nthe former is faster since it does not make a copy of the original\ndata, while the latter is more flexible since it can be applied to\narray expressions (e.g.\\ \\code{transpose(A*B)}).  The member functions\navailable are:\n\\begin{description}\n\\citem{subset(int ibegin0, int iend0, ...)} This function returns a\ncontiguous subset of an array as an array of the same rank that points\nto the original data. It takes twice as many arguments as the array\nhas dimensions, with each pair of arguments representing the indices\nto the first and last element to include from a particular\ndimension. Exactly the same result can be obtained using \\code{range}\nbut the \\code{subset} form is more concise. For example, for a matrix\n\\code{M}, \\code{M.subset(1,5,3,10)} is equivalent to\n\\code{M(range(1,5),range(3,10))}.\n%\n\\citem{T()} This function returns the transpose of a rank-2 array (a\nmatrix). The returned array points to the same data but with its\ndimensions reversed. A compile-time error occurs if this function is\nused on an array with rank other than 2.  Currently \\Adept\\ doesn't\nallow the transpose of a rank-1 array (a vector), since vectors are\nnot intended to have an intrinsic orientation.  When orientation\nmatters, such as in matrix multiplication, the intended orientation\nmay be inferred from the context or specified explicitly.\n%\n\\citem{permute(int i0, int i1, ...)} This function is the\ngeneralization of the transpose for multi-dimensional arrays: it\nreturns an array of the same rank as the original but with the\ndimensions rearranged according to the arguments. There must be the\nsame number of arguments as there are dimensions, and each dimension\n(starting at 0) must be provided once only. The returned array is\nlinked to the original; the permutation is achieved simply by\nrearranging the list of dimensions and the list of ``offsets'' (the\nseparation in memory of elements along each dimension individually).\n%\n\\citem{diag\\_matrix()} When this function is applied to a rank-1\n\\code{Array} of length $n$, it returns an $n$-by-$n$ diagonal matrix\n(specifically a \\code{DiagMatrix}; see section \\ref{sec:square}) that\npoints to the data from the rank-1 array along its diagonal.\n%\n\\citem{diag\\_vector()} When this function is applied to a rank-2\n\\code{Array} with equally sized dimensions, it returns a rank-1 array\npointing to the data along its diagonals.  An\n\\code{invalid\\_operation} exception is thrown if applied to a\nnon-square matrix, and a compile-time error if applied to an array of\nrank other than 2.\n%\n\\citem{diag\\_vector(int i)} When applied to a square rank-2 $n$-by-$n$\n\\code{Array}, this returns a rank-1 array of length\n$n-\\mathrm{abs}(i)$ pointing to the $i$th superdiagonal of the square\nmatrix, or the $-i$th subdiagonal if $i$ is negative. An\n\\code{invalid\\_exception} exception occurs if applied to a non-square\nmatrix, and a compile-time error if applied to an array of rank other\nthan 2.\n%\n\\citem{submatrix\\_on\\_diagonal(int ibegin,int iend)} When applied to a\nsquare rank-2 array, this function returns a square matrix that shares\npart of the diagonal of the original matrix.  Thus\n\\code{A.submatrix\\_on\\_diagonal(int ibegin,int iend)} is equivalent to\n\\code{A(range(ibegin,iend),range(ibegin,iend))}. Its purpose is to\nprovide a subsetting facility for symmetric, triangular and\nband-diagonal matrices (see section \\ref{sec:square}) for which\ngeneral array indexing is not available. If applied to a non-square\nmatrix, an \\code{invalid\\_operation} exception will be thrown.\n%\\citem{upper\\_matrix()}\n%\\citem{lower\\_matrix()}\n%\\citem{band\\_matrix<LDiag,UDiag>()}\n\\citem{reshape(int i0, int i1...)} Only applicable to an \\code{Array}\nof rank 1, this returns a multi-dimensional array whose dimensions are\ngiven by the arguments to the function.  Between 2 and 7 dimensions\nare possible. If the arguments are such that the total size of the\nreturned array would not match the length of the vector, an\n\\code{invalid\\_dimension} exception is thrown.\n\\end{description}\n\n\\section{Passing arrays to and from functions}\n\\label{sec:passing}\nWhen writing functions taking array arguments, there are three\ndifferent ways to do it depending on the extent to which the function\nneeds to be able to modify the array.  In the case of constant array\narguments, a constant reference should be used; for example:\n\\begin{lstlisting}\n Real l3norm(const Vector& v) {     // Function returning the L3-norm of a vector\n   return cbrt(sum(v*v*v));\n }\n Vector w(3); w << 1.0, 2.0, 3.0;   // Create a test vector\n Real ans1 = l3norm(w);             // Named vector argument\n Real ans2 = l3norm(w(range(0,1))); // Temporary vector argument\n Real ans3 = l3norm(2.0*w);         // Expression implicitly converted to temporary vector\n\\end{lstlisting}\nThis function works with all three types of argument.  The last\nexample illustrates that when an inactive rank-1 expression is passed\nto the function, it is evaluated and the result placed in a temporary\nvector that is passed to the function.\n\nAt the other extreme, we may wish to create a function that modifies\nan array argument, including the possibility of changing its size; for\nexample:\n\\begin{lstlisting}\n void resize_and_zero(int n, Vector& v) { // A rather pointless function...\n   v.resize(n); v = 0.0;\n }\n Vector w(4);\n resize_and_zero(2,w);                    // Results in w={0.0, 0.0}\n resize_and_zero(2,w(range(0,2)));        // Compile error: argument is temporary\n resize_and_zero(2,2.0*w);                // Compile error: argument is not an lvalue\n\\end{lstlisting}\nIn this case, due to the C++ rule that a non-constant reference cannot\nbind to a temporary object, the function can only take a\n\\emph{non-temporary} \\code{Vector} as an argument.  This is fair\nenough; it would not make sense to resize the subset of an array, or\nan expression. However, it is very common to want to pass a subset of\nan array to a function and for the function to modify the values of\nthe array, but not to resize it. In \\Adept\\ this is achieved as\nfollows:\n\\begin{lstlisting}\n void square_in_place(Vector v) {\n   v *= v;\n }\n Vector w(3); w << 2.0, 3.0, 5.0;\n square_in_place(w);              // Results in w={4.0, 9.0, 25.0}\n square_in_place(w(range(0,1)));  // Results in w={4.0, 9.0, 5.0}\n square_in_place(2.0*w);          // No effect on w\n\\end{lstlisting}\nEven though the \\code{Vector} has been passed by value, the\n\\code{Vector} copy constructor performs a ``shallow copy'', which\nmeans that little more than the array dimensions and a pointer to the\ndata are copied. Therefore, in the first two examples above the vector\n\\code{v} inside the function points to data in \\code{w}, and can\ntherefore modify \\code{w}.  By contrast, when an expression is passed\nto the function, a new \\code{Vector} is created to hold the result of\nthe expression, and when this is modified inside the function it does\nnot affect the data in the calling routine.\n\nThe fact that \\code{Array} copy constructors perform shallow copies\nalso improves the efficiency of functions that return arrays such as\nthe following:\n\\begin{lstlisting}\n Matrix square(const Matrix& in) {\n   Matrix out = in*in; // Create an matrix containing the result of in*in\n   return out;  \n }\n Matrix A(100,100);    // Allocate memory for \"A\"\n Matrix B = square(A); // Copy constructor: shallow copy of \"out\" into \"B\"\n\\end{lstlisting}\nAt the \\code{return} statement, matrix \\code{out} is received by the\ncopy constructor of matrix \\code{B}, so a shallow copy is\nperformed. This means that the description of matrix \\code{out} is\ncopied to \\code{B}, including a pointer to \\code{Storage} object\ncontaining both the data and a count of the number of references to\nit; this counter is increased by one. Matrix \\code{out} is then\ndestructed, and the counter is immediately reduced by one. The net\nresult is that \\code{B} has ``stolen'' the data in the matrix from\n\\code{out} without it having been copied, thus avoiding unnecessary\nallocation of memory on the heap followed by copying and deallocation. \n\nThe shallow-copy implementation leads to behaviour that users may not\nbe expecting. If an array is initialized from another array in either\nof the following two ways:\n\\begin{lstlisting}\n Matrix M(3,4);\n Matrix A(M);   // Call copy constructor\n Matrix B = M;  // Call copy constructor\n\\end{lstlisting}\nthen the result is that \\code{A}, \\code{B} and \\code{M} share the same\ndata, rather than a copy being made.  To make a deep copy, it is\nnecessary to do the following:\n\\begin{lstlisting}\n Matrix M(3,4);\n Matrix A;      // Create empty matrix\n A = M;         // Call assignment operator for deep copy\n\\end{lstlisting}\nThis is annoying, but the alternative is that there would be no clean\nway to pass a subset of an array to a function that then modifies its\nvalues. The same behaviour is implemented in the Blitz++ array class\n\\cite[]{Veldhuizen1995}.\n\nIt should be noted that with the introduction of ``move semantics'' in\nthe C++11 standard, the it is possible to detect when an array\nreturned from a function is about to be destructed, and therefore\ninvoke a move constructor that implements a shallow copy. This negates\none of the two reasons from making the copy constructor execute only a\nshallow copy.  But it does not help in passing array subsets to\nfunctions, unless two versions of every function were created, one\naccepting an lvalue reference (\\code{Array\\&}) and the other accepting\nan rvalue reference (\\code{Array\\&\\&}), which is hardly practical.\n\n\\cxx11 \\begin{leftbar}If you compile your code with C++11 features\n  enabled then move semantics can sometimes make assignment more\n  efficient. Consider code calling the \\code{square} function above:\n\\begin{lstlisting}\n Matrix A(10,10), B(10,10);\n B = square(A); // Move assignment operator performs shallow copy\n Matrix C(B);   // B and C now share the same data\n B = square(A); // Move assignment operator performs deep copy\n\\end{lstlisting}\n  Both assignments are to temporary objects about to be destructed, so\n  the move assignment operator is called. This operator checks how\n  many references there are to the data in \\code{B}. In the first case\n  there is only one reference, so the data in \\code{B} can safely be\n  discarded and a shallow copy (a ``move'') of the data in the\n  temporary is performed. In the second case there are two references,\n  so a deep copy must be performed in order that \\code{C} sees the\n  change in \\code{B}.\n\\end{leftbar}\n\n\\section{Array reduction operations}\n\\label{sec:reduce}\nA family of functions return a result that is reduced in rank compared\nto their argument, and operate in the same way as Fortran functions of\nthe same name.  Consider the \\code{sum} function, which can be used\neither to sum all the elements in an array expression and return a\nscalar, or to sum elements along the dimension specified in the second\nargument and return an array whose rank is one less than the first\nargument:\n\\begin{lstlisting}\n Array A(3,4);\n Real x = sum(A);     // Sum all elements of matrix A\n Vector v = sum(A,1); // Sum along the row dimension returning a vector of length 3\n\\end{lstlisting}\nFunctions that are used in the same way are \\code{mean},\n\\code{product}, \\code{minval}, \\code{maxval} and \\code{norm2} (the\nsquare-root of the sum of the squares of each element).  Note the\ndifference between \\code{maxval} and \\code{max}: the behaviour of\n\\code{max} is outlined in section \\ref{sec:operators}. Three further\nfunctions operate in the same way but on boolean arrays: \\code{all}\nreturns \\code{true} only if all elements are \\code{true}, \\code{any}\nreturns \\code{true} if any element is \\code{true} (and \\code{false}\notherwise), while \\code{count} returns the number of \\code{true}\nelements.  Each of these can work on an individual dimension as with\n\\code{sum} and friends.\n\nA further function, \\code{dot\\_product(a,b)}, takes two arguments that\nmust be rank-1 arrays of the same length and returns the dot\nproduct. This is essentially the same as \\code{sum(a*b)}.\n\n\\section{Array expansion operations}\n\\label{sec:expand}\nThe function \\code{outer\\_product(x,y)} returns the outer product of\ntwo rank-1 expressions; if ${\\bf x}$ and ${\\bf y}$ are interpreted as\ncolumn vectors then ${\\bf xy}^T$ is returned. If \\code{outer\\_product}\nis used in an expression then an intermediate matrix object is not\ncreated to store it.\n\nThe function \\code{spread<dim>(A,n)} returns an array that replicates\nthe \\code{A} array \\code{n} times along dimension \\code{dim}. The\nreturned array has a rank one larger than \\code{a} whose dimension\n\\code{dim} is \\code{n} and the remaining dimensions are the same as\nthose of \\code{A}. It is essentially the same as the Fortran function\nof the same name, but \\code{dim} is provided as a template arguent\nsince performance is improved if this is known at compile time.  The\nfollowing illustrates \\code{spread} for an argument of rank 1:\n\\begin{lstlisting}\n Vector v(3); v << 1, 2, 3;\n Matrix M0 = spread<0>(v,2);\n // M1 contains {{1, 2, 3},\n //              {1, 2, 3}}\n Matrix M1 = spread<1>(v,2);\n // M2 contains {{1, 1},\n //              {2, 2},\n //              {3, 3}}\n\\end{lstlisting}\nNote that \\code{spread<1>(x,y.size())*spread<0>(y,x.size())} gives the\nsame result as \\code{outer\\_product(x,y)}.\n\n\\section{Conditional operations}\n\\label{sec:conditional}\nThere are two main ways to perform an operation on an array depending\non the result of a boolean expression. The first is similar to the\nFortran \\code{where} construct:\n\\begin{lstlisting}\n Array A(3,4);\n Array B(3,4);\n A.where(B > 0.0)   = 2.0 * B;            // Only assign to A if B > 0\n A.where(!isnan(B)) = either_or(-B, 0.0); // Read from either one expression or the other\n\\end{lstlisting}\nIn the first example, \\code{A} is only assigned if a condition is met,\nand therefore \\code{A} must be of the same size and rank of the\nboolean expression. In the second example \\code{A} is filled with\nelements from the first argument of \\code{either\\_or} if the boolean\nexpression is \\code{true}, or from the second argument otherwise; if\n\\code{A} is empty then it will be resized to the size of the boolean\nexpression. In both cases, the expressions on the right-hand-side may\nbe scalars or array expressions of the same size as the boolean\nexpression.  Equivalent expressions are possible replacing the\nassignment operator with the \\code{+=}, \\code{-=}, \\code{*=} and\n\\code{/=} operators, in which case \\code{A} must already be the same\nsize as the boolean expression.\n\nAn alternative approach that works with only vectors uses the\n\\code{find} function. This is similar to the equivalent Matlab\nfunction and returns an \\code{IndexVector} (a vector of integers of\nsufficient precision to index an array) containing indices to the\n\\code{true} elements of the vector:\n\\begin{lstlisting}\n Vector v(10), w(10);\n v(find(v > 5.0)) = 3.0;\n IndexVector index = find(v > 5.0);\n v(index) = 2.0 * u(index);\n\\end{lstlisting}\nThis will work if no \\code{true} elements are found: \\code{find} will\nreturn an empty array, and when \\code{v} is indexed by an empty\nvector, no action will be taken.  In general, \\code{find} is less\nefficient than \\code{where}.\n\n\\section{Fixed-size arrays}\n\\label{sec:fixed}\nThe size of the \\code{Array} class is dynamic, which is somewhat\nsub-optimal for small arrays whose dimensions are known at compile\ntime. \\Adept\\ provides an alternative class template for an array\nwhose size is known at compile time and whose data are stored on the\nstack. It has the following declaration:\n\\begin{lstlisting}\n namespace adept {\n   template <typename Type, bool IsActive, int Dim0, int Dim1 = 0, ...>\n   class FixedArray;\n }\n\\end{lstlisting}\nThe type (e.g.\\ \\code{double}) and activeness are specified by the\nfirst two template arguments, while the remaining template arguments\nprovide the size of the dimensions, up to 7.  Only as many sizes need\nto be specified as there are dimensions.  A user working with arrays\nof a particular size could use \\code{typedef} to provide convenient\nnames; for example:\n\\begin{lstlisting}\n typedef FixedArray<double,false,4>   Vector4;\n typedef FixedArray<double,false,4,4> Matrix44;\n typedef FixedArray<double,true,4>    aVector4;\n typedef FixedArray<double,true,4,4>  aMatrix44;\n\\end{lstlisting}\nIn the \\code{adept} namespace, \\Adept\\ defines \\code{Vector2},\n\\code{Vector3}, \\code{Matrix22}, \\code{Matrix33} and their active\ncounterparts.\n\nFixed arrays have all the same capabilities as dynamic arrays, with a\nfew exceptions:\n\\begin{itemize}\n\\item Since their size is fixed, there are no member functions\n  \\code{resize}, \\code{clear} or \\code{in\\_place\\_transpose}.\n\\item Since for the lifetime of the object it is associated with data\n  on the stack, it cannot link to other data.  This means that there\n  is no member function \\code{link}, and also if it is passed by value\n  to a function then the contents of the array will be copied, rather\n  than the behaviour of the \\code{Array} class where the receiving\n  function links to the original data.\n\\end{itemize}\nAll the same slicing operations are available as discussed in section\n\\ref{sec:slice}, and they return the same types when applied to fixed\narrays as they do when applied to dynamic arrays.  Thus most\noperations return an \\code{Array} object that links to a subset of the\ndata within the \\code{FixedArray} object.\n\n\\section{Special square matrices}\n\\label{sec:square}\n\\Adept\\ offers several special types of square matrix that can\nparticipate in array expressions.  They are more efficient than\n\\code{Array}s in certain operations such as matrix multiplication and\nassignment, but less efficient in operations such as accessing\nindividual elements. All use an internal storage scheme compatible\nwith BLAS (Basic Linear Algebra Subprograms).  All are specializations\nof the \\code{SpecialMatrix} class template, which has the following\ndeclaration:\n\\begin{lstlisting}\n namespace adept {\n   template <typename Type, class Engine, bool IsActive = false>\n   class SpecialMatrix;\n }\n\\end{lstlisting}\nThe first template argument is the numerical type, the second provides\nthe functionality specific to the type of matrix being simulated, and\nthe third states whether the matrix participates in the\ndifferentiation of an algorithm. The specific types of special matrix\nare as follows:\n\\begin{description}\n\\item[Square matrices.] \\code{SquareMatrix} provides a dense square\n  matrix of type \\code{Real} with \\code{aSquareMatrix} its active\n  counterpart. Its functionality is similar to a rank-2 \\code{Array},\n  except that its dimensions are always equal and the data along its\n  fastest varying dimension are always contiguous in memory, which may\n  make it faster than \\code{Array} in some instances.\n\\item[Symmetric matrices.] \\code{SymmMatrix} provides a symmetric\n  matrix of type \\code{Real}, and \\code{aSymmMatrix} is its active\n  equivalent. Internally this type uses row-major unpacked storage\n  with the data held in the lower triangle of the array and zeros in\n  the upper triangle (equivalent to column-major storage with data in\n  the upper triangle). If the oposite configuration is required then\n  it is available by specifying different template arguments to the\n  \\code{SpecialMatrix} class template.  Note that with normal access\n  methods, the storage scheme is opaque to the user; for example,\n  \\code{S(1,2)=2.0} and \\code{S(2,1)=2.0} have the same effect.\n\\item[Triangular matrices.] \\code{LowerMatrix} and \\code{UpperMatrix}\n  (and their active equivalents prefixed by ``\\code{a}'') provide\n  triangular matrices of type \\code{Real}. Internally they use\n  row-major unpacked storage, although column-major storage is\n  available by specifying different template arguments to the\n  \\code{SpecialMatrix} class template.\n\\item[Band diagonal matrices.] \\code{DiagMatrix}, \\code{TridiagMatrix}\n  and \\code{PentadiagMatrix} provide diagonal, tridiagonal and\n  pentadiagonal \\code{Real} matrices, respectively (with their active\n  equivalents prefixed by ``\\code{a}''). Internally, row-major\n  BLAS-type band storage is used such that an $n$-by-$n$ tridiagonal\n  matrix stores $3n$ rather than $n^2$ elements. \\Adept\\ supports\n  arbitrary numbers of sub-diagonals and super-diagonals, accessible\n  by specifying different template arguments to the\n  \\code{SpecialMatrix} class template.\n\\end{description}\nA \\code{SpecialMatrix} can be constructed and resized as for\n\\code{Array}s (see section \\ref{sec:array}), with the following\nadditions:\n\\begin{lstlisting}\n SymmMatrix S(4);  // Initialize a 4-by-4 symmetric matrix\n S.resize(5);      // Resize to a 5-by-5 matrix\n\\end{lstlisting}\nThese are applicable to all types of \\code{SpecialMatrix}.\n\nIn terms of array indexing and slicing, the member functions \\code{T},\n\\code{diag} and \\code{diag\\_submatrix} described in section\n\\ref{sec:slice} are all available, but if you index a\n\\code{SpecialMatrix} with \\code{S(a,b)} then \\code{a} and \\code{b} must\nbe scalars or scalar expressions. For triangular or band-diagonal\nmatrices, if the requested element is one of the zero parts of the\nmatrix then it can only be used as an rvalue in an expression. If you\nwish to extract arbitrary subarrays from a \\code{SpecialMatrix} then it\nmust first be converted to a \\code{Matrix}:\n\\begin{lstlisting}\n SymmMatrix S(6);\n intVector index(3);\n index << 2, 3, 5;\n Matrix M = Matrix(S)(index,stride(0,4,2));\n\\end{lstlisting}\n\n\n\\section{Matrix multiplication}\n\\label{sec:matmul}\nMatrix multiplication may be invoked in two equivalent ways: using the\n\\code{matmul} function or the ``\\code{**}'' pseudo-operator. Following\nFortran, the two arguments may be either rank-1 or rank-2, but at\nleast one argument must be of rank-2. The orientation of any rank-1\nargument is inferred from whether it is the first or second argument,\nas shown here:\n\\begin{lstlisting}\n Matrix A(3,5), B(5,3), C;\n Vector v(5), w;\n C = matmul(A,B); // Matrix-matrix multiplication: return a 3x3 matrix\n w = matmul(v,B); // Interpret v as a row vector: return a vector of length 3\n w = matmul(A,v); // Interpret v as a column vector: return a vector of length 3\n\\end{lstlisting}\nIn this way it is never necessary to transpose a vector; the\nappropriate orientation to use is inferred from the context.  You may\nfind it clearer to use ``\\code{**}'' for matrix multiplication as\nillustrated here:\\footnote{A drawback of the \\code{**} interface with\n  the orientation of vector arguments being inferred is that in an\n  expression like \\code{A**v**B} (where \\code{A} and \\code{B} are\n  matrices and \\code{v} is a vector), \\code{v} is interpreted as a\n  column vector in \\code{A**v}, which returns a column vector result,\n  but this result is then implicitly transposed when it is used as the\n  left-hand argument of the matrix multiplication with \\code{B}.\n  Moreover, the order of precedence affects the result, since this\n  expression will not give the same answer as \\code{A**(v**B)}.\n % I may\n % consider introducing additional constraints and features in future\n % versions to require users to more explicitly state what they mean in\n % such situations, to reduce the chance of accidental mistakes.\n}\n\\begin{lstlisting}\n Matrix A(3,5), B;\n SymmMatrix S(5);                // 5-by-5 symmetric matrix\n Vector c, x(5);\n c = A **  log(S) ** x;          // Returns a vector of length 3\n c = matmul(matmul(A,log(S)),x); // Equivalent to the previous line but using matmul\n c = A ** (log(S) ** x);         // As the previous example but more efficient\n B = 2.0 * S ** A.T();           // Returns a 5-by-3 matrix\n B = 2.0 * S ** A;               // Run-time error: inner dimensions don't match\n\\end{lstlisting}\nThe ``\\code{**}'' pseudo-operator has been implemented in \\Adept\\ by\noverloading the dereference operator such that ``\\code{*A}'' returns a\nspecial type when applied to array expressions, and overloading the\nmultiply operator to perform matrix multiplication when one of these\ntypes is on the right-hand-side. This means that \\code{**} has the\nsame precedence as ordinary multiplication, and both will be applied\nin order of left to right.  Thus, in the first example above,\nmatrix-matrix multiplication is performed followed by matrix-vector\nmultiplication. The second example shows how to make this more\nefficient with parentheses to specify that the rightmost matrix\nmultiplication should be applied first, leading to two matrix-vector\nmultiplications.  The final example shows an expression that would\nfail at runtime with an \\code{inner\\_dimension\\_mismatch} exception\ndue to the matrix multiplication being applied to matrices whose inner\ndimensions do not match.\n\nYou cannot use \\code{matmul} or ``\\code{**}'' for vector-vector\nmultiplication, since it is ambiguous whether you require the inner\nproduct (dot product) or the outer product. Therefore you must\nexplicitly call the function \\code{dot\\_product} (section\n\\ref{sec:reduce}) or \\code{outer\\_product} (section \\ref{sec:expand}).\n\nIn order to get the best performance, \\Adept\\ does not use expression\ntemplates for matrix multiplication but rather calls the appropriate\nlevel-2 BLAS function for matrix-vector multiplication and level-3\nBLAS function for matrix-matrix multiplication. For matrix\nmultiplication involving active vectors and matrices, \\Adept\\ first\nuses BLAS to perform the matrix multiplication and then stores the\nequivalent differential statements. There are therefore a few factors\nthat users should be aware of in order to get the best performance:\n\\begin{itemize}\n\\item If an array expression rather than an array is provided as an\n  argument to matrix multiplication, it will first be converted to an\n  \\code{Array} of the same rank. Therefore, if the same expression is\n  used more than once in a sequence of matrix multiplications, better\n  performance will be obtained by precomputing the array expression\n  and storing it in a temporary matrix:\n\\begin{lstlisting}\n Matrix A(5,5), B(5,5), C(5,5), D(5,5)\n // Slow implementation:\n C = transpose(2.0*A*B) ** (2.0*A*B);\n D = (2.0*A*B) ** C;\n // Faster implementation:\n {\n   Matrix tmp = 2.0*A*B;\n   C = tmp.T() ** tmp;\n   D = tmp ** C;\n } // \"tmp\" goes out of scope here\n\\end{lstlisting}\n\\item If the left-hand argument of a matrix multiplication is a\n  symmetric, triangular or band matrix then a specialist BLAS function\n  will be used that is faster than the one for general dense\n  matrices. \\Adept\\ may not be able to tell if the result of an array\n  expression is symmetric, triangular or has a band structure, and so\n  may not call the most efficient BLAS function. The user can help as\n  follows:\n\\begin{lstlisting}\n SymmetricMatrix S(5,5)\n Matrix A(5,5), B(5,5)\n B = (2.0*exp(S)) ** A;           // Slower\n B = SymmMatrix(2.0*exp(S)) ** A; // Faster\n\\end{lstlisting}\n\\item BLAS requires that the fastest-varying dimension of input\n  matrices are contiguous and increasing. This is always the case for\n  the special square matrices described in section \\ref{sec:square},\n  but not necessarily for a \\code{Matrix} or an \\code{aMatrix}, which are\n  particular cases of the general \\code{Array} type. If the\n  fastest-varying dimension of such a matrix is not contiguous and\n  increasing then \\Adept\\ will copy it to a temporary matrix before\n  invoking matrix multiplications, as in the following example:\n\\begin{lstlisting}\n Matrix A(5,5), B, C(5,5);\n B.link(A(__, stride(end,1,-1)); // Fastest varying dim is contiguous but decreasing\n C = A ** A; // Matrix multiplication applied directly with A\n C = B ** B; // Adept will copy B to a temporary matrix before multiplication\n\\end{lstlisting}\n\\end{itemize}\n\nAn additional member function to mention in this section is\n\\code{in\\_place\\_transpose()}, which is only applicable to\nmatrices. It transposes the matrix by swapping the dimensions and the\noffsets to each dimension, but leaving the actual data untouched.\nThis means that a matrix with row-major storage will be changed to\ncolumn-major, and vice versa.\n\n\\Adept\\ can differentiate expressions involving matrix multiplcation,\nbut this is far from optimal in \\Adept\\ version 2.0, for two\nreasons. Firstly, only differentiation of dense matrices has been\nimplemented, so when matrix multiplication is applied to active\n``special matrices'' (symmetric, band, upper-triangular and\nlower-triangular matrices), they are first copied to a dense\nmatrix. Secondly, the \\Adept\\ stack format can currently only store\ndifferential statements for scalar expressions, which for matrix\nmultiplication leads to lots of repeated values on the stack. A future\nversion of \\Adept\\ will redesign the stack to allow matrices to be\nstored in it; this will be much faster and much less memory-hungry.\n\n\\section{Linear algebra}\n\\label{sec:la}\n\\Adept\\ provides the functions \\code{solve} and \\code{inv} to solve\nsystems of linear equations and to invert a matrix, respectively,\nwhich themselves call the most appropriate function from\nLAPACK.\n\\begin{lstlisting}\n Matrix A(5,5), Ainv(5,5), X(5,5), B(5,5);\n SymmMatrix S(5), Sinv(5);\n Vector x(5), b(5);\n Ainv = inv(A);     // Invert general square matrices using LU decomposition\n Sinv = inv(S);     // Invert symmetric matrices using Cholesky decomposition\n x = solve(A,b);    // Solve general system of linear equations\n X = solve(S,B);    // Solve symmetric system of linear equations with matrix right-hand-side\n\\end{lstlisting}\n\\iffalse\nAs for matrix multiplication described in section \\ref{sec:matmul}, if\nthe arguments to \\code{solve} and \\code{inv} are not matrices with\nfastest-varying dimensions that are contiguous and increasing, then\n\\Adept\\ will first convert them to temporary matrices before\nperforming the operation.\n\\fi\n\nStatements involving \\code{solve} and \\code{inv} cannot yet be\nautomatically differentiated. When the \\Adept\\ stack is redesigned to\nhold matrices, this capability will be added.\n\n\\section{Interpolation}\n\\emph{Adept} supports linear and nearest-neighbour interpolation, in\none, two and three dimensions via the \\code{interp}, \\code{interp2d}\nand \\code{interp3d} functions. The example below shows how these\nfunctions are called and the size of the arguments, but does not fill\nthe arguments with actual data (see the test program\n\\code{test/test\\_interp.cpp} for complete usage):\n%\n\\begin{lstlisting}\n // Size of each dimension\n int nx, ny, nz;\n // Coordinate vectors of each dimension (must be monotonic)\n Vector x(nx), y(ny), z(nz);\n // Arrays to be interpolated\n Vector  A1(nx);\n Matrix  A2(ny,nx);\n Array3D A3(nz,ny,nx);\n // Number of points required\n int ni;\n // Locations of these points\n Vector xi(ni), yi(ni), zi(ni);\n // Output vector\n Vector v(ni);\n // Linear interpolation (default)\n v = interp(x,A1,xi);\n v = interp(x,A1,xi,ADEPT_INTERPOLATE_LINEAR); // Specifying scheme explicitly\n v = interp2d(y,x,A2,yi,xi);\n v = interp3d(z,y,x,A3,zi,yi,xi);\n // Nearest-neighbour interpolation\n v = interp(x,A1,xi,ADEPT_INTERPOLATE_NEAREST);\n v = interp2d(y,x,A2,yi,xi,ADEPT_INTERPOLATE_NEAREST);\n v = interp3d(z,y,x,A3,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST);\n\\end{lstlisting}\n%\nEach interpolation function takes coordinate vectors describing each\ndimension of the interpolation array in the order of the dimensions of\nthat array. In the two dimensional case, since matrices are indexed\nfirst by row ($y$ axis) then column ($x$ axis), this is the order they\nare shown here.\n\nThe interpolation arrays (\\code{A1}, \\code{A2} and \\code{A3} here) may\nhave more dimensions than shown above; for each additional dimension, a\nfurther dimension is added to the output array, and effectively\nmultiple arrays are interpolated at once. In this case, the coordinate\nvectors still refer to the first one, two or three dimensions of this\narray and the remaining (more rapidly varying in memory) dimensions\ncome after.\n\nAs can be seen from the listing above, an optional argument after the\narray arguments specifies the interpolation scheme to use, but this\nargument can also be used to specify the extrapolation policy to apply\nfor requested points that lie outside of the interpolation array by\nusing a bitwise-OR with one of the following:\n%\n\\begin{description}\n  \\citem{ADEPT\\_EXTRAPOLATE\\_DEFAULT} Use the default extrapolation\n  policy associated with the interpolation scheme (see\n  below). Obviously this can be omitted.\n  \\citem{ADEPT\\_EXTRAPOLATE\\_LINEAR} Linear extrapolation; this is the\n  default for linear interpolation, but is not available with\n  nearest-neighbour interpolation.  \\citem{ADEPT\\_EXTRAPOLATE\\_CLAMP}\n  Clamp the returned value at the nearest valid point in the\n  interpolation array; this is the default for nearest-neighbour\n  interpolation.  \\citem{ADEPT\\_EXTRAPOLATE\\_CONSTANT} Set outliers to\n  a constant value provided by a further optional argument to the\n  function, or \\code{NaN} if no additional argument is provided.\n\\end{description}\nFor example:\n\\begin{lstlisting}\n // Explicit selection of default behaviour (linear interpolation & extrapolation)\n v = interp(x,A1,xi,ADEPT_INTERPOLATE_LINEAR|ADEPT_EXTRAPOLATE_DEFAULT);\n // Nearest-neighbour interpolation with clamped extrapolation\n v = interp(x,A1,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP);\n // Nearest-neighbour interpolation, outliers set to NaN\n v = interp(x,A1,xi,ADEPT_EXTRAPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT);\n // Linear interpolation, outliers set to zero\n v = interp(x,A1,xi,ADEPT_EXTRAPOLATE_CONSTANT, 0.0);\n\\end{lstlisting}\n\n\\section{Bounds and alias checking}\n\\label{sec:bounds}\nWhen encountering an array or active expression, \\Adept\\ performs\nseveral checks to test the validity of the expression both at compile\ntime and at runtime:\n\\begin{description}\n\\item[Activeness check.] An expression in which an active expression\n  is assigned to an inactive array will fail to compile.\n\\item[Rank check.] An expression will fail to compile if the rank of\n  the array on the left-hand-side of the ``\\code{=}'' operator (or the\n  operators ``\\code{+=}'', ``\\code{*=}'', etc.) does not match the\n  rank of the array expression on the right-hand-side. However, a\n  scalar (rank-0) expression can be assigned to an array of any rank;\n  its value will be assigned to all elements of the\n  array. Compile-time rank checks are also performed for each binary\n  operation (binary operators such as ``\\code{+}'' and binary\n  functions such as \\code{pow}) making up an array expression:\n  compilation will fail if the two arguments do not have the same rank\n  and neither is of rank 0.\n\\item[Dimension check.] When a binary operation is applied to two\n  array expressions of rank $n$ then \\Adept\\ checks at run-time that\n  each of the $n$ dimensions has the same length. Otherwise, a\n  \\code{size\\_mismatch} exception is thrown.\n\\item[Alias check.] By default, \\Adept\\ checks to see whether the memory\n  referenced in the array object on the left-hand-side of a statement\n  overlaps with the memory referenced by any of the objects on the\n  right-hand-side, as in this example of a shift-right operation:\n\\begin{lstlisting}\n Vector v(6);\n v(range(1,end)) = v(range(0,end-1));\n\\end{lstlisting}\n  In order to prevent the right-hand-side changing during the\n  operation, \\Adept\\ copies the expression on the right-hand-side to a\n  temporary array and then assigns the left-hand-side array to this\n  temporary, which is equivalent to the following:\n\\begin{lstlisting}\n {\n   Vector tmp;\n   tmp = v(range(0,end-1));\n   v(range(1,end)) = tmp;\n } // tmp goes out of scope here\n\\end{lstlisting}\n  However, for speed \\Adept\\ does not check to see whether individual\n  memory locations are shared; rather the start and end memory\n  locations are checked to see if they overlap. This means that for\n  certain strided operations, copying to a temporary array is\n  unnecessary.  Nor is it necessary if elements of an array will be\n  accessed in exactly the same order on the left-hand-side as the\n  right-hand-side. If the user is sure that alias checking is not\n  necessary then he or she can override alias checking for part or all\n  of an array expression using the \\code{noalias} function, as\n  follows:\n\\begin{lstlisting}\n v(stride(1,end,2)) = noalias(v(stride(0,end-1,2))); // No overlap between RHS and LHS\n v = 1.0 + noalias(exp(v));                          // LHS & RHS accessed in same order \n\\end{lstlisting}\n  Note that for speed, alias checking is not performed if the\n  left-hand-side is a \\code{FixedArray}, since such arrays can never\n  point to another location and therefore aliasing is less likely to\n  arise. Aliasing is still possible if one of the terms on the\n  right-hand-side points to the data in the \\code{FixedArray} on the\n  left. In this case, you can use the \\code{eval} function, which\n  takes a non-scalar expression as an argument, and returns an array\n  containing a copy of the data. For example:\n\\begin{lstlisting}\n FixedArray<Real,false,3> v = {1.0, 2.0, 3.0}; // C++11 initialization of inactive vector\n v = v(stride(end,0,-1));                      // Aliasing leads to v = {3.0, 2.0, 3.0}\n v = eval(v(stride(end,0,-1)));                // Expected result:  v = {3.0, 2.0, 1.0}\n\\end{lstlisting}\n  To avoid the overhead of alias checking, you can define the\n  preprocessor variable \\code{ADEPT\\_NO\\_ALIAS\\_CHECKING}, but then it\n  is up to the user to identify the statements where aliasing will\n  occur and use the \\code{eval} function to ensure the correct\n  behaviour.\n\\item[Bounds check.] If the preprocessor variable\n  \\code{ADEPT\\_BOUNDS\\_CHECKING} is defined then additional run-time\n  checks will be performed when an array is indexed or sliced using\n  the methods described in section \\ref{sec:slice}; if an index is\n  ount of bounds then a \\code{index\\_out\\_of\\_bounds} exception will\n  be thrown.  This makes indexing and slicing of arrays slower so\n  would normally only be used for debugging.\n\\end{description}\n\n\\section{Automatic differentiation capabilities specific to arrays}\nSection \\ref{sec:adjoint} described how the \\code{get\\_gradient()}\nmember function could be used to extract the gradients from a scalar\n\\code{adouble} object after applying forward- or reverse-mode\ndifferentiation. In the same way, gradients may be extracted from\nactive \\code{Array} and \\code{FixedArray} objects, returning an\ninactive \\code{Array} of the same rank and size. For example, to\ncompute the derivative of a \\code{norm2} operation, we could do the\nfollowing:\n\\begin{lstlisting}\n Stack stack;                     // Stack to store differential statements\n aVector x = {1.0, 2.0, 3.0};     // C++11 initialization\n stack.new_recording();           // Clear any stored differential statements\n aReal y = norm2(x);              // Perform operation to be differentiated\n y.set_gradient(1.0);             // Seed the independent variable\n stack.reverse();                 // Reverse-mode differentiation\n Vector dy_dx = x.get_gradient(); // Extract vector of derivatives\n\\end{lstlisting}\n\n\\section{Array thread safety}\n\\label{sec:thread}\nThere are numerous ways of obtaining an \\code{Array} that links to\ndata in another \\code{Array} object; not only the ``\\code{>>=}'' link\noperator described in section \\ref{sec:array}, but also the various\nsubsetting member functions described in section \\ref{sec:slice}, and\neven just passing arrays to and from functions. This avoids deep\ncopying and so improves efficiency. In addition to the new \\code{Array}\npointing to the same data, it also points to the same \\code{Storage}\nobject, and when a new link is created, the counter in this object\nindicating the number of objects pointing to it is incremented. This\nensures that the data will remain provided there is at least one\nobject linking to it.  A downside of this model is that if multiple\nthreads access an array simultaneously, even if just to read it, then\nthe reference counter can become corrupted.  There are two solutions\nto this problem. \n\n\\cxx11 \\begin{leftbar} If you are using C++11 then you can define the\n  \\code{ADEPT\\_STORAGE\\_THREAD\\_SAFE} preprocessor variable, which\n  makes the reference counter in \\code{Storage} objects of type\n  \\code{std::atomic<int>} and thereby protects all operations on them\n  by a mutex. This may degrade the efficiency of your code since the\n  mutex will be redundant in single-threaded code. \\end{leftbar}\n\nAlternatively, we use the capability of arrays to access data not held\nin a \\code{Storage} object. The \\code{Array} and \\code{SpecialMatrix}\nclasses have a \\code{soft\\_link()} member function that returns an\nobject of the same type, size and activeness, which points to the same\ndata but does not contain a link to the \\code{Storage} object:\n\\begin{lstlisting}\n Matrix M(2,2);\n // ...enter multi-threaded environment\n Matrix N;\n N >>= M.soft_link();            // N links to same data as M but without Storage object\n Vector v = M.soft_link()(__,0); // v links to subset of M but without Storage object\n                                 // (recall that the copy constructor is called here) \n\\end{lstlisting}\nThe linked objects may be used in the same way as any other\n\\code{Array}. This is demonstrated in the\n\\code{test\\_thread\\_safe\\_arrays} test program.\n\\section{Writing an array to a stream}\nAs you would expect, an array can be written to a stream with the\n``\\code{<<}'' operator:\n\\begin{lstlisting}\n Vector v = {1, 2};            // Using C++11 initializer lists\n Matrix M = {{3, 4}, {5, 6}};  // for convenience\n std::cout << v << \"\\n\";\n std::cout << M << \"\\n\";\n\\end{lstlisting}\nwhich by default produces\n\\begin{lstlisting}\n 1 2\n 3 4\n 5 6\n\\end{lstlisting}\nYou can change the output to use curly brackets to indicate\nthe dimensions of the array as follows:\n\\begin{lstlisting}\n Vector v = {1, 2};\n Matrix M = {{3, 4}, {5, 6}};\n adept::set_array_print_style(PRINT_STYLE_CURLY);\n std::cout << \"v = \" << v << \";\\n\";\n std::cout << \"M = \" << M << \";\\n\";\n\\end{lstlisting}\nwhich produces output that looks like C/C++ code:\n\\begin{lstlisting}\n v = {1, 2};\n M =\n {{3, 4},\n  {5, 6}};\n\\end{lstlisting}\nThe available print styles for use by \\code{set\\_array\\_print\\_style}\nare \\code{PRINT\\_STYLE\\_PLAIN} (default), \\code{PRINT\\_STYLE\\_CURLY},\n\\code{PRINT\\_STYLE\\_CSV} (comma-separated values) and\n\\code{PRINT\\_STYLE\\_MATLAB} (matrix ordering indicated by Matlab-style\nsemi-colons and square brackets).\n\n\\section{Fortran interoperability}\n\\label{sec:fortran}\nThe traditional way to pass arrays between Fortran and C/C++ makes use\nof the fact that Fortran passes its ``explicit-shape'' arrays (the\ntype used since Fortran-77) to and from routines simply as a pointer\nto the first element of the array. It is then up to the receiving\nroutine to declare the size of the array correctly.  \\Adept\\ arrays\ncan therefore be passed to Fortran routines using their \\code{data()}\nand \\code{const\\_data()} member functions, which return pointers to\nthe first element of the array.\n\nSince Fortran-90, the language also supports ``assumed-shape'' arrays,\nwhich are very much like \\Adept's \\code{Array} objects: they contain\nwithin them the extent of each array dimension, and may refer to data\nthat are strided (non-contiguous) in memory.  Fortran passes an\nassumed-shape array to subroutines and functions in the form of a\npointer to its \\emph{array descriptor} (sometimes known as a\n\\emph{dope vector}), which contains a pointer to the first element of\nthe array and information on the rank, type, and the extent and\nstride-in-memory of each dimension.\n\nThe Fortran 2018 standard defines an interface to allow assumed-shape\narrays to be passed to and from C or C++ functions.  Fortran compilers\nsupporting this standard provide a C/C++ header file\n\\code{ISO\\_Fortran\\_binding.h} that defines the array descriptor as a\nstructure \\code{CFI\\_cdesc\\_t}.  The \\Adept\\ header file\n\\code{adept\\_fortran.h} provides a class \\code{adept::FortranArray}, a\nthin wrapper to this structure, that enables an \\Adept\\ \\code{Array}\nobject to share its data with a Fortran array. This is very efficient\nas only the array descriptor information is copied, not the actual\ndata in the array.  At the time of writing, support for this\ncapability in Fortran compilers is limited.\n\nAn crucial point to be aware of in all the examples that follow is\nthat \\Adept\\ indexes its arrays in row-major order starting at 0,\nwhile Fortran indexes its arrays in column-major order starting (by\ndefault) at 1. When arrays are passed between the two languages, the\nnative array convention is adopted. Therefore, matrix element\n\\code{A(0,10)} in \\Adept\\ would be indexed as \\code{A(11,1)} in\nFortran.\n\n\\subsection{Passing arrays from C++/Adept to Fortran}\nSuppose we have a Fortran subroutine that takes an integer array and a\nsingle-precision array as arguments:\n\\begin{lstlisting}[language=Fortran]\n ! Define a routine callable with same name in C/C++\n subroutine fortran_routine(int_array, flt_array) bind(c)\n   implicit none\n   integer(kind=4), intent(inout) :: int_array(:,:) ! Matrix of 4-byte integers\n   real(kind=4),    intent(inout) :: flt_array(:,:) ! Matrix of 4-byte real numbers\n   ! --- Body of routine here ---\n end subroutine fortran_routine\n\\end{lstlisting}\nThe following C++ program demonstrates how \\Adept\\ arrays can be\npassed this routine:\n\\begin{lstlisting}\n #include <adept_fortran.h>\n // Declare interface to the routine, turning off C++ name mangling so that it can be linked\n // to Fortran \n extern \"C\" void fortran_routine(adept::FortranArray* int_array,\n                                 adept::FortranArray* flt_array);\n int main() {\n   // Initialize Adept matrices, using shortcuts to the types Array<2,int> and Array<2,float>\n   adept::intMatrix   int_arr = {{2, 3, 5}, {7, 11, 13}};\n   adept::floatMatrix flt_arr = {{2.0, 3.0, 5.0}, {7.0, 11.0, 13.0}};\n   // Convert Adept arrays to Fortran arrays pointing to the same data, and call the routine;\n   // the conversion to FortranArray pointers is done automatically\n   fortran_routine(adept::FortranArray(int_arr),\n                   adept::FortranArray(flt_arr));\n   return 0;\n }\n\\end{lstlisting}\nThis will fail to compile if \\code{ISO\\_Fortran\\_binding.h} file is\nnot found. To link the two object files into an executable you will\nneed to use your C++ compiler, but include the relevant Fortran\nlibrary on the command line (e.g.\\ \\code{-lgfortran} if you compiled\n\\code{fortran\\_routine} with the GNU Fortran compiler, or\n\\code{-lifcore} if you used the Intel Fortran compiler).\n\n\\subsection{Passing arrays from Fortran to C++/Adept}\nWe can also pass arrays the other way. Consider the following Fortran\nprogram:\n\\begin{lstlisting}[language=Fortran]\n program test_interoperability\n   implicit none\n   ! Define interface to a function implemented in C++\n   interface\n     subroutine adept_routine(int_array, flt_array) bind(c)\n       integer(kind=4), intent(inout) :: int_array(:,:) ! Matrix of 4-byte integers\n       real(kind=4),    intent(inout) :: flt_array(:,:) ! Matrix of 4-byte real numbers\n     end subroutine adept_routine \n   end interface\n   ! Body of program starts here\n   integer(kind=4), allocatable :: imat(:,:)\n   real(kind=4),    allocatable :: fmat(:,:)  \n   ! --- Code to allocate and populate imat and fmat here ---\n   ! Now call the C++ function\n   call adept_routine(imat, fmat)\n end program test_interoperability\n\\end{lstlisting}\nThe routine could be implemented in C++ as follows:\n\\begin{lstlisting}\n #include <adept_fortran.h>\n extern \"C\" void adept_routine(adept::FortranArray* int_array,\n                               adept::FortranArray* flt_array) {\n   // Declare Adept arrays\n   adept::intMatrix   int_arr;\n   adept::floatMatrix flt_arr;\n   // Associate Adept arrays with Fortran data, or throw a fortran_interoperability_error\n   // exception if the rank or type do not match\n   int_arr >>= int_array;\n   flt_arr >>= flt_array;\n   // --- Operations on int_arr and flt_arr now modify the Fortran arrays ---\n }\n\\end{lstlisting}\nSince the executable now contains a Fortran source file with a\n\\code{program} statement, rather than a C++ source file defining a\n\\code{main} function, the linking step of the compilation must be\ncarried out using the Fortran compiler, but passing it the C++\nstandard library, i.e.\\ \\code{-lstdc++}.\n\nIn the example above, the \\Adept\\ arrays \\code{int\\_arr} and\n\\code{flt\\_arr} behave in the same way as ``linked'' arrays described\nin section \\ref{sec:array}: they know that they do not ``own'' the\noriginal data, so if the user then calls their \\code{clear} or\n\\code{resize} member functions, they will unlink themselves from the\nFortran arrays.  The \\code{FortranArray} class provides no array\nfeatures itself, so must be linked to an \\code{Array} object before\nany work can be done on it, but it does provide a handful of member\nfunctions for querying its properties:\n\\begin{description}\n \\citem{int rank()} Return the number of dimensions of the array.\n \\citem{int dimension(int i)} Return the extent of dimension \\code{i}\n in memory, counting dimensions from 0 but using the Fortran ordering.\n \\citem{int offset(int i)} Return the stride in memory of dimension\n \\code{i}.\n \\citem{bool is\\_type<Type>()} Return \\code{true} if the element type\n of the array is the same as \\code{Type} (which must be a known type at\n compile time).\n \\citem{Type* data<Type>()} Return a pointer to the first element of\n the data, cast to the specified type.\n\\end{description}\n\n\\chapter{Using \\Adept's optimization functionality}\n\\label{chap:optimize}\n\n\\section{Background}\n\\label{sec:optimize}\nSince version 2.0.8, \\Adept\\ provides functionality for solving\nnon-linear optimization problems, specifically finding the state\nvector ${\\bf x}$ that minimizes the scalar cost function $J({\\bf\n  x})$ (also known as a penalty function or objective function).\n%\nA \\emph{gradient-free} minimization algorithm (e.g.\\ Nelder-Mead)\nrequires simply a user-supplied function for computing $J$, calling it\nmultiple times for different ${\\bf x}$ to find the minimum $J$.\n%\nA \\emph{first-order} minimization algorithm requires also a\nuser-supplied function returning the gradient of the cost function\n$\\partial J/{\\partial\\bf x}$ (a vector). Examples are the Conjugate Gradient\nmethod and the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno\n(L-BFGS) method. Knowing the gradient enables such algorithms to find\nthe minimum with far fewer function calls, although a function call\nreturning $\\partial J/{\\partial\\bf x}$ is slower than one returning only\n$J$. \\Adept's optimization interface is in terms of passive array\ntypes, so the user is not obliged to use \\Adept's automatic\ndifferentiation capability to compute these gradients, although the\nexamples in this chapter assume that they do.\n%\nA \\emph{second-order} minimization algorithm makes use of not only $J$\nand $\\partial J/{\\partial\\bf x}$, but also a user-supplied function for the\nHessian ${\\bf A}=\\partial^2J({\\bf x})/\\partial{\\bf x}^2=\\nabla_{\\bf\n  x}^2J$ (a symmetric matrix), or an approximation of it.  Examples\nare the Gauss-Newton and Levenberg-Marquardt methods. Knowing the\nsecond derivative means that even fewer iterations should be required\nto find the minimum of $J$, but ${\\bf A}$ is more expensive to compute\nthan $\\partial J/{\\partial\\bf x}$.\n\n\\Adept\\ does not have the ability to automatically compute Hessian\nmatrices for an arbitrary cost function, but frequently the cost\nfunction has a specific form that makes it possible to compute the\napproximate Hessian from the Jacobian matrix.  Consider the\noptimization problem of finding the parameters $\\x$ of non-linear\nmodel $\\y(\\x)$ that provides the closest match to a set of\n``observations'' $\\y^o$ in a least-squares sense.  For maximum\ngenerality we add constraints that penalize differences between $\\x$\nand a set of \\emph{a~priori} values $\\x^a$, as well as a\nregularization term.  In this case the cost function could be written\nas \\def\\myspace{~~}\n\\begin{equation}\nJ(\\x) \\myspace =\\myspace \\frac12\\left[\\y(\\x)-\\y^o\\right]^\\mathrm{T}{\\bf\n  R}^{-1}\\left[\\y(\\x)-\\y^o\\right]\n\\myspace+\\myspace\\frac12\\left[\\x-\\x^a\\right]^\\mathrm{T}{\\bf\n  B}^{-1}\\left[\\x-\\x^a\\right]\n\\myspace+\\myspace\\frac12\\x^\\mathrm{T}{\\bf T}\\x.\\nonumber\n\\label{eq:objective}\n\\end{equation}\nHere, all vectors are treated as column vectors, ${\\bf R}$ is the\nerror covariance matrix of the observations, ${\\bf B}$ is the error\ncovariance matrix of the \\emph{a~priori} values, and ${\\bf T}$ is a\nTwomey-Tikhonov matrix that penalizes either spatial gradients or\ncurvature in $\\x$.  The approximate Hessian matrix is then given by\n\\begin{equation}\n{\\bf A} \\myspace\\simeq\\myspace {\\bf H}^\\mathrm{T}{\\bf\n  R}^{-1}{\\bf H}\\nonumber\n\\myspace+\\myspace {\\bf B}^{-1} \\myspace+\\myspace {\\bf T},\n\\label{eq:hessian}\n\\end{equation}\nwhich can be coded up using \\Adept\\ to compute the Jacobian matrix\n${\\bf H}=\\partial\\y/\\partial\\x$. Each term on the right-hand-side of\n(\\ref{eq:hessian}) has its corresponding term in (\\ref{eq:objective}),\nso it is easy to work out what the Hessian would look like if only a\nsubset of the terms in (\\ref{eq:objective}) were present. The first\nterm of (\\ref{eq:hessian}) is the `Gauss-Newton' approximation of the\ntrue Hessian of the first term of (\\ref{eq:objective}).  It is exact\nif $\\y(\\x)$ is linear, i.e.\\ if each element of $\\y$ could be\nrepresented as a linear combination of the elements of $\\x$. In many\ncases this is a good enough approximation of the Hessian for fast\nconvergence to be achieved.\n\n\\section{\\Adept\\ interface}\n\\label{sec:minimizer_interface}\nFor the purposes of demonstrating how this would be implemented in\n\\Adept\\ we simplify (\\ref{eq:objective}) down to the case of\nminimizing a quadratic function, in which case $J={\\bf\n  y}^\\mathrm{T}{\\bf y}/2$ and ${\\bf y}={\\bf x}$.  The former of these\ntwo equations means that the Hessian matrix is simply ${\\bf A}={\\bf\n  H}^\\mathrm{T}{\\bf H}$. The latter we implement using active\nvariables:\n%\n\\begin{lstlisting}\n adept::aVector calc_y(const adept::aVector& x) { return x; }\n\\end{lstlisting}\n%\nThe test program \\code{test/test\\_minimizer.cpp} uses a ${\\bf y}({\\bf\n  x})$ function for the much more intersting case of the\n$N$-dimensional Rosenbrock function.  To set up the problem ready for\nminimizing, we create a class that derives from \\Adept's\n\\code{Optimizable} class and overrides five of its virtual functions:\n%\n\\begin{lstlisting}\n // Include this header file for the functionality described in this chapter\n #include <adept_optimize.h>\n\n class SimpleOptimizable : public adept::Optimizable {\n public:\n   // Return the cost function for a given state vector x\n   virtual adept::Real calc_cost_function(const adept::Vector& x) {\n     adept::Vector y = value(calc_y(x)); // \"value\" throws away the activeness\n     return 0.5*sum(y*y);\n   }\n\n   // Calculate the cost function and its gradient from x\n   virtual adept::Real calc_cost_function_gradient(const adept::Vector& x,\n\t\t\t\t\t           adept::Vector gradient) {\n     adept::aVector xactive = x;           // Copy x to an active variable\n     stack.new_recording();\n     adept::aVector y = calc_y(xactive);   // Calculate y from x\n     adept::aReal cost = 0.5*sum(y*y);     // Calculate cost function as an active variable\n     cost.set_gradient(1.0);               // Use reverse-mode differentiation to\n     stack.reverse();                      //   compute the gradient\n     gradient = xactive.get_gradient();\n     return value(cost);                   // Return cost function as passive variable\n   }\n\n   // Calculate the cost function, its gradient and the approximate Hessian matrix\n   virtual adept::Real calc_cost_function_gradient_hessian(const adept::Vector& x,\n\t\t\t   adept::Vector gradient, adept::SymmMatrix& hessian) {\n     adept::aVector xactive = x;           // Copy x to an active variable\n     stack.new_recording();\n     adept::aVector y = calc_y(xactive);   // Calculate y from x\n     adept::aReal cost = 0.5*sum(y*y);     // Calculate cost function as an active variable\n     stack.independent(xactive);           // Define independent variables\n     stack.dependent(y);                   // Define dependent variables\n     adept::Matrix jac = stack.jacobian(); // Compute Jacobian matrix dy/dx\n     hessian  = jac.T() ** jac;            // Hessian is a simple matrix product of Jacobian\n     gradient = jac.T() ** value(y);       // Gradient is a matrix-vector product\n     return value(cost);                   // Return cost function as passive variable\n   }\n\n   // Every iteration this function is called: here simply report progress to standard output\n   virtual void report_progress(int niter, const adept::Vector& x,\n                                adept::Real cost, adept::Real gnorm) {\n     std::cout << \"Iteration \" << niter << \": cost function = \" << cost << \"\\n\";\n   }\n \n   // Minimization algorithm may want to check what derivatives are available: here we \n   // provide 0th (cost function alone) 1st (gradient) and 2nd (Hessian), so return true\n   // for 0, 1 or 2, false otherwise\n   virtual bool provides_derivative(int order) { return (order >= 0 && order <= 2); }\n\n // Keep an instance of the Adept stack within the class: avoids the initialization costs\n // incurred each iteration if it was inside calc_cost_function_gradient and\n // calc_cost_function_gradient_hessian \n private:\n   adept::Stack stack;\n };\n\\end{lstlisting}\nNote that if you plan to use a first-order minimization algorithm, you\ndo not need to provide a\n\\code{calc\\_cost\\_function\\_gradient\\_hessian} function.\n\n\\Adept's \\code{Minimizer} class can minimize the cost function\nheld in an \\code{Optimizable} object by calling the user-supplied\nvirtual functions, as follows:\n\\begin{lstlisting}\n SimpleOptimizable quadratic_function;\n adept::Minimizer minimizer(MINIMIZER_ALGORITHM_LEVENBERG); // Select minimization algorithm\n int nx = 10;         // Number of state variables\n adept::Vector x(nx); // Declare state vector\n x = 3.0;             // Initialize state vector to first guess values, all 3.0\n // Minimize the cost function:\n adept::MinimizerStatus status = minimizer.minimize(quadratic_function, x);\n // Report the convergence status:\n std::cout << \"Convergence status: \" << adept::minimizer_status_string(status) << \"\\n\";\n\\end{lstlisting}\n%\nAfter the \\code{minimize} member function is called, \\code{x} contains\nthe state vector that minimizes the cost function.\n\nThe available minimization algorithms are:\n\\begin{description}\n\\citem{MINIMIZER\\_ALGORITHM\\_CONJUGATE\\_GRADIENT} The first-order\nConjugate-Gradient algorithm performs a line search along the\nsteepest-descent direction, then uses the Polak-Ribi\\`ere formula to\ncompute subsequent search directions that are conjugate to the\nprevious $N$ directions, where $N$ is the number of state\nvariables. The Conjugate-Gradient method is the most memory efficient,\nso suitable for problems with large $N$. The line search first\nbrackets the minimum then fits a cubic polynomial to the values and\ngradients at the bounding points to find the best estimate of the next\nsearch point. The Wolfe conditions are applied to determine whether\nthe cost function along the search direction has been sufficiently\nminimized.\n%\n\\citem{MINIMIZER\\_ALGORITHM\\_CONJUGATE\\_GRADIENT\\_FR} As above but\nusing the Fletcher-Reeves formula to compute new search directions.\n%\n\\citem{MINIMIZER\\_ALGORITHM\\_LIMITED\\_MEMORY\\_BFGS} The first-order\nLimited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm\nuses a limited number of previous search directions (default 6) to\nbuild up an approximation to the inverse of the Hessian matrix,\nenabling it to make a better estimate of the location of the minimum\nof the cost function, but with a slightly higher memory\nfootprint. Note that the full inverse Hessian is not computed\nexplicitly so this method is still efficient in memory for large $N$.\n%\n\\citem{MINIMIZER\\_ALGORITHM\\_LEVENBERG} The second-order Levenberg\nalgorithm tries to perform a Gauss-Newton step using the approximate\nHessian matrix and assuming that the curvature of $J$ is locally\nconstant. If $J$ at the new ${\\bf x}$ is not reduced by this step then\na damping parameter $\\lambda$ is used to scale between Gauss-Newton\nand a steepest-descent algorithm.\n%\n\\citem{MINIMIZER\\_ALGORITHM\\_LEVENBERG\\_MARQUARDT} The second-order\nLevenberg-Marquardt algorithm is similar to the Levenberg algorithm,\nbut scales such that the step sizes are changed in each dimension\naccording to the curvature of the cost function in that dimension\n(i.e.\\ the diagonal of the Hessian matrix). This tends to result in\nfaster convergence than the Levenberg algorithm for problems with very\ndifferent scaling for each element of the state vector.\n\\end{description}\nThe minimizer can be configured in detail by calling its member\nfunctions listed in section \\ref{sec:minimizer_options}.  The possible\nvalues for the return status are given in section\n\\ref{sec:minimizer_status}.\n\nThe case above is an example of \\emph{unconstrained minimization}: the\nminimizer is free to try any values of ${\\bf x}$. This can lead to it\ntrying unphysical values, such as negative values for a quantity that\ncannot be negative.  To prevent this, all the minimization algorithms\nallow the user to specify simple box constraints on the elements of\nthe state vector.  Suppose we wanted to constrain element 0 to be\npositive and element 1 to lie in the range 10--20, we would add these\nlines:\n\\begin{lstlisting}\n // Declare vectors containing the lower and upper bounds on x\n adept::Vector x_lower, x_upper;\n // Set them to the minimum and maximum possible values for their element type (e.g. double)\n adept::minimizer_initialize_bounds(nx, x_lower, x_upper);\n // Set a lower bound on element 0 and both bounds on element 1\n x_lower(0) = 0.0;\n x_lower(1) = 10.0;\n x_upper(1) = 20.0;\n // Call the minimize function with two extra arguments specifying the bounds\n status = minimizer.minimize(quadratic_function, x, x_lower, x_upper);\n\\end{lstlisting}\n\n% minimizer.set_max_iterations(100);\n% minimizer.set_converged_gradient_norm(0.1);\n\n\\section{Other member functions of the \\code{Minimizer} class}\n\\label{sec:minimizer_options}\nIn addition to the \\code{minimize} member function described in\nsection 3\\ref{sec:minimizer_interface}, the following\n\\code{adept::Minimizer} member functions may be called to configure\nthe behaviour of the minimization algorithm:\n\\begin{description}\n\\citem{void set\\_algorithm(MinimizerAlgorithm algo)} Set the algorithm\nto one of the available minimization algorithm,\ne.g.\\ \\code{MINIMIZER\\_ALGORITHM\\_LEVENBERG}. This is an alternative\nto providing it as an argument to the \\code{Minimizer} constructor.\n%\n\\citem{void set\\_algorithm(const std::string\\&\\ algo)} Set the\nalgorithm using a string, which may be one of ``\\code{L-BFGS}'',\n``\\code{Conjugate-Gradient}'', ``\\code{Conjugate-Gradient-FR}'',\n``\\code{Levenberg}'' or ``\\code{Levenberg-Marquardt}''.  Note that\nthis function is case-insensitive, and will also accept spaces or\nunderscores in place of hyphens.\n%\n\\citem{void set\\_max\\_iterations(int max\\_it)} Set the maximum number\nof iterations (default 100).\n%\n\\citem{void set\\_converged\\_gradient\\_norm(Real cgn)} The L2-norm of\nthe $\\partial J/\\partial{\\bf x}$ vector is computed each iteration,\nand convergence is deemed to have been achieved when it falls below\nthe value specified here (default 0.1).\n%\n\\citem{void set\\_max\\_step\\_size(Real mss)} Set the maximum step size\neach iteration (default: no maximum). A negative or zero value\nindicates that no maximum step size is to be used.\n%\n\\citem{void ensure\\_updated\\_state(int order = 2)} Often the user will\nrequire the Hessian matrix to compute errors in the solution, and will\nstore the Hessian matrix each time the\n\\code{calc\\_cost\\_function\\_gradient\\_hessian} function is\ncalled. However, by default there is no guarantee that when the\nminimization has completed this function will have been called with the\nfinal version of the state vector.  Calling the member function here\nrequests that after minimization is complete, the derivatives of at\nleast the specified \\code{order} are consistent with the final state\nvector (e.g.\\ 2 for both the Hessian and the gradient vector).\n%\n\\citem{set\\_max\\_line\\_search\\_iterations(int mi)} Set the maximum\nnumber of iterations to perform in a line search (default 10). The\nsame value is used by the Conjugate-Gradient and L-BFGS methods.\n%\n\\citem{set\\_armijo\\_coeff(Real ac)} The first of the Wolfe conditions\ndetermines how much of a decrease in the cost function is satisfactory\nfor a line search to complete, controlled by the Armijo coefficient\n(default $10^{-4}$). The same value is used by the Conjugate-Gradient\nand L-BFGS methods.\n%\n\\citem{set\\_lbfgs\\_curvature\\_coeff(Real lcc)} The second Wolfe\ncondition is that the magnitude of the gradient in the search\ndirection is reduced by a certain amount determined by the curvature\ncoefficient, the optimum value of which is different for the\nConjugate-Gradient and L-BFGS methods. The default for the L-BFGS\nmethod is 0.9.\n%\n\\citem{set\\_cg\\_curvature\\_coeff(Real cgcc)} The curvature coefficient\nto use for the Conjugate-Gradient method (default 0.1).\n%\n\\citem{void set\\_levenberg\\_damping\\_limits(Real damp\\_min, Real\n  damp\\_max)} Set the minimum and maximum postive values of the\ndamping parameter $\\lambda$ used by both the Levenberg and\nLevenberg-Marquardt algorithms (default $1/128$ and\n$10^5$). Internally the algorithm can still use zero when each\niteration is reducing the cost function.\n%\n\\citem{void set\\_levenberg\\_damping\\_start(Real damp\\_start)} Set the\ninitial damping factor for the Levenberg and Levenberg-Marquardt\nalgorithms (default 0).\n%\n\\citem{void set\\_levenberg\\_damping\\_restart(Real damp\\_restart)} Set\nthe value of the damping factor $\\lambda$ in the Levenberg and\nLevenberg-Marquardt algorithms that is used when a value of\n$\\lambda=0$ does not result in the cost function being reduced\n(default $1/4$).\n%\n\\citem{void set\\_levenberg\\_damping\\_multiplier(Real damp\\_multiply,\n  Real damp\\_divide)} Set the multiplier and divider that will be used\nto scale the damping factor when an iteration does not and does reduce\nthe cost function, respectively (default 2.0 and 5.0).\n\\end{description}\nThe following member functions return the minimizer algorithm that the\n\\code{Minimizer} is currently configured to use:\n\\begin{description}\n\\citem{MinimizerAlgorithm algorithm()} Return the enumeration\nrepresenting the minimization algorithm.\n\\citem{std::string algorithm\\_name()} Return a string representing the\nminimization algorithm.\n\\end{description}\nThe following member functions extract information about the progress\nof the minimization after it has completed:\n\\begin{description}\n\\citem{int n\\_iterations()} Return the number of iterations performed.\nOnly iterations that successfully reduced the cost function are\ncounted.\n%\n\\citem{int n\\_samples()} Return the number of times the cost\nfunction was computed, including times when this did not reduce the\ncost function.\n%\n\\citem{Real cost\\_function()} Return the final value of the cost\nfunction.\n%\n\\citem{Real gradient\\_norm()} Return the final value of the norm of\nthe $\\partial J/\\partial{\\bf x}$ vector.\n%\n\\citem{Real start\\_cost\\_function()} Return the cost function for the\nfirst guess of the state vector provided by the user.\n%\n\\citem{MinimizerStatus status()} Return the convergence status.\n\\end{description}\n\n\n\\section{Return status for minimization}\n\\label{sec:minimizer_status}\nThe following enumerations may be returned by \\code{Minimizer}'s\n\\code{minimize} member function representing the status of the\nminimization. The \\code{adept::minimizer\\_status\\_string} function\nconverts a status to a user-readable string, as demonstrated\nin one of the examples in section \\ref{sec:minimizer_interface}.\n\\begin{description}\n\\citem{MINIMIZER\\_STATUS\\_SUCCESS} Minimization was successful.\n%\n\\citem{MINIMIZER\\_STATUS\\_EMPTY\\_STATE}  The state vector provided is empty.\n%\n\\citem{MINIMIZER\\_STATUS\\_MAX\\_ITERATIONS\\_REACHED} The maximum number\nof iterations was reached.\n%\n\\citem{MINIMIZER\\_STATUS\\_FAILED\\_TO\\_CONVERGE} Convergence was not\nachieved, even though some progress may have been made in minimizing\nthe cost function. This usually occurs when, in the vicinity of the\nminimum, the $J({\\bf x})$ terrain is quite flat and numerical errors\nmean that the gradient returned from the user-supplied function does\nnot point uphill as it should.  This means that when the algorithm\nuses the gradient to try to go downhill it finds the cost function\nincreasing.\n%\n\\citem{MINIMIZER\\_STATUS\\_INVALID\\_COST\\_FUNCTION} The cost function\nreturned is NaN or infinity. This is usually solved by using bounded\nminimization to ensure that the values of ${\\bf x}$ are kept within\nphysically reasonable bounds.\n\\citem{MINIMIZER\\_STATUS\\_INVALID\\_GRADIENT} The gradient vector\nreturned contains NaN or infinity values. Use bounded minimization.\n%\n\\citem{MINIMIZER\\_STATUS\\_INVALID\\_BOUNDS} The bounds requested are\nnot valid, for instance a maximum bound was requested that is less\nthan the minimum bound.\n%\\citem{MINIMIZER\\_STATUS\\_NUMBER\\_AVAILABLE}\n%\\citem{MINIMIZER\\_STATUS\\_NOT\\_YET\\_CONVERGED\n\\end{description}\n\n\n\\chapter{General considerations}\n\\label{chap:gen}\n\n\\section{Setting and checking the global configuration}\n\\label{sec:settings}\n\\noindent The following non-member functions are provided in the\n\\code{adept} namespace:\n\\begin{description}\n\\citem{std::string version()} Returns a string containing the version\nnumber of the \\Adept\\ library (e.g. ``\\code{2.0.8}'').\n\\citem{std::string compiler\\_version()} Returns a string containing\nthe compiler name and version used to compile the \\Adept\\ library.\n\\citem{std::string compiler\\_flags()} Returns a string containing the\ncompiler flags used when compiling the \\Adept\\ library.\n\\citem{std::string configuration()} Returns a multi-line string\nlisting numerous aspects of the way \\Adept\\ has been configured.\n\\citem{bool have\\_matrix\\_multiplication()} Returns \\code{true} if the\nAdept library has been compiled with BLAS support, \\code{false}\notherwise.\n\\citem{bool have\\_linear\\_algebra()} Returns \\code{true} if the\nAdept library has been compiled with LAPACK support, \\code{false}\notherwise.\n\\citem{int set\\_max\\_blas\\_threads(int n)} Set the maximum number of\nthreads used for matrix operations by the BLAS library, or zero to use\nthe upper limit on your system. The number returned is the number\nactually used.  \n\\citem{int max\\_blas\\_threads()} Return the maximum number of\nthreads available for matrix operations by the BLAS library.\n%\n\\end{description}\n\nThe preprocessor can detect the \\Adept\\ version at run-time via the\n\\code{ADEPT\\_VERSION} preprocessor variable, which is an integer\nvariable with the digits $abbcc$ corresponding to \\Adept\\ version\n$a.bb.cc$. This could be used to activate a different compile path\ndependent on the version, or even to fail to compile if the version is\nnot recent enough:\n\\begin{lstlisting}\n #if ADEPT_VERSION < 10910\n #error \"Adept >= 1.9.10 is required by this program\"\n #endif\n\\end{lstlisting}\n\n\\section{Parallelizing \\Adept\\ programs}\n\\Adept\\ currently has limited built-in support for parallelization. If\nthe algorithms that you wish to differentiate are individually small\nenough to be treated by a single processor core, and you wish to\ndifferentiate multiple algorithms independently (or the same algorithm\nbut with multiple sets of inputs) then parallelization is\nstraightforward. This is because the global variable containing a\npointer to the \\Adept\\ stack uses thread-local storage.  This means\nthat if a process spawns multiple threads (e.g.\\ using OpenMP or\nPthreads) then each thread can declare one \\code{adept::Stack} object\nand all \\code{adouble} operations will result in statements being\nstored on the stack object specific to that thread.  The\n\\Adept\\ package contains a test program \\code{test\\_thread\\_safe} that\ndemonstrates this approach in OpenMP.\n\nIf your problem is larger and you wish to use parallelism to speed-up\nthe differentiation of a single large algorithm then the build-in\nsupport is more limited. Provided your program and the \\Adept\\ library\nwere compiled with OpenMP enabled (which is the default for the\n\\Adept\\ library if your compiler supports OpenMP), the computation of\nJacobian matrices will be parallelized.  By default, the maximum\nnumber of concurrent threads will be equal to the number of available\ncores, but this can be overridden with the\n\\code{set\\_max\\_jacobian\\_threads} member function of the \\code{Stack}\nclass.  Note that the opportunity for speed-up depends on the size of\nyour Jacobian matrix: for an $m\\times n$ matrix, the number of\nindependent passes through the stored data is $\\mathrm{min}(m,n)$ and\neach thread treats \\code{ADEPT\\_MULTIPASS\\_SIZE} of them (see section\n\\ref{sec:configuring_lib}), so the maximum number of threads that can\nbe exploited is $\\mathrm{min}(m,n)/$\\code{ADEPT\\_MULTIPASS\\_SIZE}.\nAgain, the \\code{test\\_thread\\_safe} program can demonstrate the\nparallelization of Jacobian calculations.  Note, however, that if the\n\\code{jacobian} function is called from within an OpenMP thread\n(e.g.\\ if the program already uses OpenMP with each thread containing\nits own \\code{adept::Stack} object), then the program is likely not to\nbe able to spawn more threads to assist with the Jacobian calculation.\n\nIf you need Jacobian matrices then the ability to parallelize the\ncalculation of them is useful since this tends to be more\ncomputationally costly than recording the original algorithm.  If you\nonly require the tangent-linear or adjoint calculations (equivalent to\na Jacobian calculation with $n=1$ or $m=1$, respectively), then\nunfortunately you are stuck with single threading. It is intended that\na future version of \\Adept\\ will enable all aspects of differentiating\nan algorithm to be parallelized with either or both of OpenMP and MPI.\n\nIf your BLAS library has support for parallelization then be aware\nthat the performance may be poor if other parts of the program are\nparallelized.  This occurs with OpenBLAS, which uses Pthreads, if you\nalso use parallelized Jacobian calculations, which use OpenMP.  In\nthis instance you can turn off parallelization of array operations\nwith the \\code{set\\_max\\_blas\\_threads(1)} function in the\n\\code{adept} namespace.  The number of available threads for array\noperations is returned by the \\code{max\\_blas\\_threads()} function.\nAlternatively, you can use the \\code{OPENBLAS\\_NUM\\_THREADS}\nenvironment variable to control the number of threads used by\nOpenBLAS, and the \\code{OMP\\_NUM\\_THREADS} environment variable to\ncontrol the number used in Jacobian calculations.\n\n\n\\section{The fast exponential function}\n\\label{sec:fastexp}\n\\Adept\\ was originally developed for algorithms that make frequent\ncalls to the exponential function \\code{exp}, but unfortunately most\ncompilers do not vectorize \\code{exp}.  Therefore, \\Adept\\ provides\nthe function \\code{fastexp} in the \\code{adept} namespace, which can\noperate on active and passive scalars and array arguments (including\nthe simple \\code{float} and \\code{double}) just like \\code{exp}.  It\nuses an adapted form of an algorithm from Agner Fog's Vector Class\nLibrary (VCL) that is around a third faster for scalar arguments, but\ncan be vectorized making it as much as 10 times faster when applied to\n\\Adept\\ arrays depending on the instruction set available.  It is\naccurate but not bit-reproducible with \\code{exp} and produces finite\nresults for a slightly smaller range of input values: from $-87.3$ to\n$+89.0$ for \\code{float} arguments and from $-708.39$ to $+709.70$ for\n\\code{double} arguments.\n\nIf you have an existing code that calls \\code{exp} with \\Adept\\ types\nas arguments, and wish to use the faster algorithm for all of them,\nthen simply compile your code with \\code{-DADEPT\\_FAST\\_EXPONENTIAL}.\nThis will not change the behaviour of \\code{exp} for other types of\narguments, which would typically use the version from the C++ standard\nlibrary. If you compile your code with\n\\code{-DADEPT\\_FAST\\_SCALAR\\_EXPONENTIAL} then a fast exponential\nfunction \\code{adept::exp} will be defined that works on arguments of\ntype \\code{float} and \\code{double}.  However, this can cause a\nnamespace clash as some C header files import \\code{exp} from the\nstandard library outside of any namespace.\n\n\\section{Tips for the best performance}\n\\label{sec:tips}\n\\begin{itemize}\n\\item If you are working with single-threaded code, or in a\n  multi-threaded program but with only one thread using a Stack\n  object, then you can get slightly faster code by compiling all of\n  your code with \\code{-DADEPT\\_STACK\\_THREAD\\_UNSAFE}. This uses a\n  standard (i.e. non-thread-local) global variable to point to the\n  currently active stack object, which is slightly faster to access.\n\\item If you compile with the \\code{-g} option to store debugging\n  symbols, your object files and executable will be much larger\n  because every mathematical statement in the file will have the name\n  of its associated templated type stored in the file, and these names\n  can be long. Once you have debugged your code, you may wish to omit\n  debugging symbols from production versions of the executable, or\n  reduce the level of detail with \\code{-g1} (on the GNU C++\n  compiler).  There is typically no performance penalty associated\n  with including debugging symbols.\n\\item A high compiler optimization setting is recommended to inline\n  the function calls associated with mathematical expressions.  On the\n  GNU C++ compiler, the \\code{-O3 -march=native} setting is\n  recommended.\n\\item As outlined in the previous section, if you use the \\code{exp}\n  function then you can replace them with the faster \\code{fastexp}\n  function in or compile your code with\n  \\code{-DADEPT\\_FAST\\_EXPONENTIAL}.\n\\item On Intel and ARM architectures, \\Adept\\ will use the SSE2, AVX,\n  AVX512 or NEON instruction sets (depending on availability) to\n  vectorize array expressions that satisfy a number of requirements:\n  (1) they contain only elementary mathematical operators (including\n  the functions \\code{sqrt}, \\code{max}, \\code{min} and\n  \\code{fastexp}), (2) the arrays in the expression are either all of\n  type \\code{float} or all of type \\code{double}, (3) all the arrays\n  in the expression must have their final dimension increasing in\n  memory with no stride, and (4) none of the arrays are active. On the\n  GNU compiler the \\code{-march=native} selects the best available\n  instruction set, but you can select a specific set with\n  \\code{-msse2}, \\code{-mavx} or \\code{-mavx512f}. With the SSE2 and\n  NEON instruction sets, 2 \\code{double}s or 4 \\code{float}s are\n  operated on at once, for AVX these rise to 4 and 8 respectively, and\n  for AVX512 they rise to 8 and 16 respectively.\n\\item By default the Jacobian functions are compiled to process a\n  strip of rows or columns of the Jacobian matrix at once. The optimum\n  width of the strip depends on your platform, and you may wish to\n  change it. To make the Jacobian functions process \\textit{n} rows or\n  columns at once, recompile the \\Adept\\ library with\n  \\code{-DADEPT\\_MULTIPASS\\_SIZE=}\\textit{n}.\n\\item If you suspect memory usage is a problem, you may investigate\n  the memory used by \\Adept\\ by simply sending your \\code{Stack} object to a\n  stream, e.g. ``\\code{std::cout \\textless\\textless\\ stack}''. You may\n  also use the \\code{memory()} member function, which returns the\n  total number of bytes used. Further details of similar functions is\n  given in section \\ref{sec:stack}.\n\\end{itemize}\n\n\\section{Exceptions thrown by the \\Adept\\ library}\n\\label{sec:exceptions}\nSome functions in the \\Adept\\ library can throw exceptions, and the\nexceptions that can be thrown are typically derived from either\n\\code{adept::autodiff\\_exception} or\n\\code{adept::array\\_exception}. These classes are derived from\n\\code{adept::exception}, which is itself derived from\n\\code{std::exception}. Most indicate an error in the users code,\nusually associated with calling \\Adept\\ functions in the wrong order.\n\nAn overly comprehensive exception-catching implementation that takes\ndifferent actions depending on whether a specific \\Adept\\ exception,\nan exception related to automatic differentiation, a general\n\\Adept\\ exception, or a non-\\Adept\\ exception is thrown, could have\nthe following form:\n%\n\\begin{lstlisting}\n try {\n   adept::Stack stack;\n   // ... Code using the Adept library goes here ...\n }\n catch (adept::stack_already_active& e) {\n   // Catch a specific Adept exception\n   std::cerr << \"Error: \" << e.what() << std::endl;\n   // ... any further actions go here ...\n }\n catch (adept::autodiff_exception& e) {\n   // Catch any Adept exception related to automatic differentiation not yet caught\n   std::cerr << \"Error: \" << e.what() << std::endl;\n   // ... any further actions go here ...\n }\n catch (adept::exception& e) {\n   // Catch any other Adept exception not yet caught\n   std::cerr << \"Error: \" << e.what() << std::endl;\n   // ... any further actions go here ...\n }\n catch (...) {\n   // Catch any exceptions not yet caught\n   std::cerr << \"An error occurred\" << std::endl;\n   // ... any further actions go here ...\n }\n\\end{lstlisting}\n%\nAll exceptions implement the \\code{what()} member function, which\nreturns a \\code{const char*} containing an error message. \n\n\\subsection{General exceptions}\nThe following exceptions are not specific to arrays or automatic\ndifferentiation and inherit directly from \\code{adept::exception}::\n\\begin{description}\n\\citem{feature\\_not\\_available} This exception is thrown by deprecated\nfunctions, such as \\code{Stack::start()}. It is also thrown by\nfunctions that are not available because a certain library is not\nbeing used, such as \\code{inv} if \\Adept\\ was compiled without LAPACK\nsupport, or matrix multiplciation via the `\\code{**}' psudo-operator\nif \\Adept\\ was compiled without BLAS support.\n\\end{description}\n\n\\subsection{Automatic-differentiation exceptions}\nThe following exceptions relate to automatic differentiation (the\nfunctionality described in chapter \\ref{chap:ad}), and all are in the\n\\code{adept} namespace:\n\\begin{description}\n\\citem{gradient\\_out\\_of\\_range} This exception can be thrown by the\n\\code{adouble::get\\_gradient} member function if the index to its\ngradient is larger than the number of gradients stored.  This can\nhappen if the \\code{adouble} object was created after the first\n\\code{adouble::set\\_gradient} call since the last\n\\code{Stack::new\\_recording} call. The first\n\\code{adouble::set\\_gradient} call signals to the \\Adept\\ stack that\nthe main algorithm has completed and so memory can be allocated to\nstore the gradients ready for a forward or reverse pass through the\ndifferential statements. If further \\code{adouble} objects are created\nthen they may have a gradient index that is out of range of the memory\nallocated.\n%\n\\citem{gradients\\_not\\_initialized} This exception can be thrown by\nfunctions that require the list of working gradients to have been\ninitialized (particularly the functions\n\\code{Stack::compute\\_tangent\\_linear} and\n\\code{Stack::compute\\_adjoint}). This initialization occurs when\n\\code{adouble::set\\_gradient} is called.\n%\n\\citem{stack\\_already\\_active} This exception is thrown when an\nattempt is made to make a particular \\code{Stack} object ``active'',\nbut there already is an active stack in this thread. This can be\nthrown by the \\code{Stack} constructor or the \\code{Stack::activate}\nmember function.\n%\n\\citem{dependents\\_or\\_independents\\_not\\_identified} This exception\nis thrown when an attempt is made to compute a Jacobian but the\nindependents and/or dependents have not been identified.\n%\n\\citem{wrong\\_gradient} This exception is thrown by the\n\\code{adouble::append\\_derivative\\_dependence} if the \\code{adouble}\nobject that it is called from is not the same as that of the most\nrecent \\code{adouble::add\\_derivative\\_dependence}. \n%\n\\citem{non\\_finite\\_gradient} This exception is thrown if the users\ncode is compiled with the preprocessor variable\n\\code{ADEPT\\_TRACK\\_NON\\_FINITE\\_GRADIENTS} defined, and a\nmathematical operation is carried out for which the derivative is not\nfinite. This is useful to locate the source of non-finite derivatives\ncoming out of an algorithm.\n\\end{description}\n\n\\subsection{Array exceptions}\n\\label{sec:array_exceptions}\nThe following exceptions relate to arrays (the functionality described\nin chapter \\ref{chap:arrays}), and all are in the \\code{adept}\nnamespace:\n\\begin{description}\n\\citem{size\\_mismatch} A mathematical operation taking two arguments\nhas been applied to array expressions that are not of the same\nsize. The same exception is thrown if an array expression is applied\nto an array of a different size.\n\\citem{inner\\_dimension\\_mismatch} Matrix multiplication has been\nattempted with arrays whose inner dimensions don't agree.\n\\citem{empty\\_array} An empty array has been used in an operation when\na non-empty array is required; for example, if an attempt is made to\nlink an array to an empty array (see section \\ref{sec:array} for more\ninformation on linking).\n\\citem{invalid\\_dimension} Attempt to create an array with a negative\ndimension.\n\\citem{index\\_out\\_of\\_bounds} An element or range of elements has\nbeen requested from an array but one of the indices provided is out of\nrange; for a dimension of length $n$, the index is not in the range\n$0$ to $n-1$. Note that bounds checking is only applied if the\npreprocessor variable \\code{ADEPT\\_BOUNDS\\_CHECKING} is defined.\n%\\citem{invalid\\_lvalue}\n\\citem{invalid\\_operation} An invalid operation has been performed\nthat can only be detected at run-time, for example, calling the\n\\code{diag\\_submatrix} member function of a non-square rank-2\n\\code{Array}.\n\\citem{matrix\\_ill\\_conditioned} An attempt has been made to factorize\nan ill-conditioned matrix (either via \\code{solve} or \\code{inv}).\n\\citem{fortran\\_interoperability\\_error} An attempt has been made to\nassociate an \\Adept\\ \\code{Array} with a \\code{FortranArray} of the\nwrong rank or type.\n\\end{description}\n\n\\section{Configuring the behaviour of \\Adept}\n\\label{sec:configuring}\nThe behaviour of the \\Adept\\ library can be changed by defining one or\nmore of the \\Adept\\ preprocessor variables. This can be done either by\nediting the \\code{adept/base.h} file and uncommenting the relevant\n\\code{\\#define} lines, or by compiling your code with \\code{-Dxxx}\ncompiler options (replacing \\code{xxx} by the relevant preprocessor\nvariable. There are two types of preprocessor variable: the first\ntypes only apply to the compilation of user code, while the second\ntypes require the \\Adept\\ library to be recompiled.\n\n\\subsection{Modifications not requiring a library recompile}\n\\label{sec:configuring_no_lib}\nThe preprocessor variables that apply only to user code and do not\nrequire the \\Adept\\ library to be recompiled are as follows:\n\\begin{description}\n\\citem{ADEPT\\_STACK\\_THREAD\\_UNSAFE} If this variable is defined, the\ncurrently active stack is stored as a global variable but is not\ndefined to be ``thread-local''. This is slightly faster, but means\nthat you cannot use multi-threaded code with separate threads holding\ntheir own active \\code{Stack} object. Note that although defining this\nvariable does not require a library recompile, all source files that\nmake up a single executable must be compiled with this option (or all\nnot be).\n%\n\\citem{ADEPT\\_RECORDING\\_PAUSABLE} This option enables an algorithm to\nbe run both with and without automatic differentiation from within the\nsame program via the functions \\code{Stack::pause\\_recording()} and\n\\code{Stack::continue\\_recording()}.  Note that although defining this\nvariable does not require a library recompile, all source files that\nmake up a single executable must be compiled with this option (or all\nnot be). Further details on this option are provided in section\n\\ref{sec:pausable}.\n%\n\\citem{ADEPT\\_NO\\_AUTOMATIC\\_DIFFERENTIATION} This option turns off\nautomatic differentiation by treating \\code{adouble} objects as\n\\code{double}. It is useful if you want to compile one source file\ntwice to produce versions with and without automatic\ndifferentiation. Further details on this option are provided in\nsection \\ref{sec:multipleobjects}.\n%\n\\citem{ADEPT\\_TRACK\\_NON\\_FINITE\\_GRADIENTS} Often when an algorithm\nis first converted to use an operator-overloading automatic\ndifferentiation library, the gradients come out as Not-a-Number or\nInfinity. The reason is often that the algorithm contains operations\nfor which the derivative is not finite (e.g.\\ $\\sqrt{a}$ for $a=0$),\nor constructions where a non-finite value is produced but subsequently\nmade finite (e.g.\\ $\\exp(-1.0/a)$ for $a=0$). Usually the algorithm\ncan be recoded to avoid these problems, if the location of the\nproblematic operations can be identified. By defining this\npreprocessor variable, a \\code{non\\_finite\\_gradient} exception will\nbe thrown if any operation results in a non-finite derivative. Running\nthe program within a debugger (and ensuring that the exception is not\ncaught within the program) enables the offending line to be\nidentified.\n%\n\\citem{ADEPT\\_INITIAL\\_STACK\\_LENGTH} This preprocessor variable is\nset to an integer, and is used as the default initial amount of memory\nallocated for the recording, in terms of the number of statements and\noperations.\n%\n\\citem{ADEPT\\_REMOVE\\_NULL\\_STATEMENTS} If many variables in your code\nare likely to be zero then redundant operations will be added to the\nlist of differential statements. For example, the assignment\n$a=b\\times c$ with active variables $b$ and $c$ both being zero\nresults in the differential statement $\\delta a=0\\times\\delta\nb+0\\times\\delta c$. This preprocessor variable checks for zeros and\nremoves terms on the right-hand-side of differential statements if it\nfinds them. In this case it would put $\\delta a=0$ on the stack\ninstead. This option slows down the recording stage, but speeds up the\nsubsequent use of the recorded stack for adjoint and Jacobian\ncalculations. The speed up of the latter is only likely to exceed the\nslow down of the former if your code contains many zeros. For most\ncodes, this option causes a net slow down.\n%\n\\citem{ADEPT\\_COPY\\_CONSTRUCTOR\\_ONLY\\_ON\\_RETURN\\_FROM\\_FUNCTION} In\n\\Adept\\ 1.1 this enabled a small but unsafe optimization. It now has\nno effect.\n%\n\\citem{ADEPT\\_BOUNDS\\_CHECKING} If this variable is defined, check\nthat all array indices are within the bounds of the array throwing an\n\\code{index\\_out\\_of\\_bounds} exception if necessary.  If this\nvariable is not defined then these checks are not performed, which is\nfaster but means that attempts to access arrays out of bounds will\nresult either of corruption of other memory used by the process, or a\nsegmentation fault. \n\\citem{ADEPT\\_NO\\_ALIAS\\_CHECKING} This variable turns off alias\nchecking, which results in faster code, but may lead to unexpected\nresults if the right-hand-side of an array statement shares data with\nthe left-hand-side of the expression. If this is likely for a\nparticular statement then use the \\code{eval} function, described in\nsection \\ref{sec:bounds}.\n\\citem{ADEPT\\_NO\\_DIMENSION\\_CHECKING} This variable turns off\nchecking the dimensions match when an array expression is assigned to\nanother array.\n\\citem{ADEPT\\_STORAGE\\_THREAD\\_SAFE} This variable ensures that\naccesses to the reference counter in \\code{Storage} objects are\natomic, enabling the \\code{Array} and \\code{SpecialMatrix} objects\nthat use them to be accessed safely in a multi-threaded\nenvironment. Note that this may incur a performance penalty, and is\nonly available in C++11. See section \\ref{sec:thread}.\n\\citem{ADEPT\\_INIT\\_REAL\\_SNAN} To detect errors caused by use of\nuninitialized data, initialize floating point arrays and active\nscalars with signaling NaNs.  This is typically accompanied by\ndirecting the program to fail with a floating-point exception if a NaN\nis used in an expression, achieved by adding the following to one of\nthe program source files:\n\\begin{lstlisting}\n #include <fenv.h>\n int _feenableexcept_status = feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);\n\\end{lstlisting}\nIt should then be possible to use a debugger to identify the array\nthat was read before being initialized with real numbers.\n\\end{description}\n\n\\subsection{Modifications requiring a library recompile}\n\\label{sec:configuring_lib}\n\\noindent The preprocessor variables that require the \\Adept\\ library\nto be recompiled are as follows. Note that if these variables are used\nthey must be the same when compiling both the library and the user\ncode. This is safest to implement by editing section 2 of the\n\\code{adept/base.h} header file.\n\\begin{description}\n\\citem{ADEPT\\_REAL\\_TYPE\\_SIZE} If you want to compile \\Adept\\ to use\na precision other than double for the \\code{Real} type, and hence for\nautomatic differentiation, then define this preprocessor variable to\nbe \\code{4} (for \\code{float}), \\code{8} (for \\code{double}) or\n\\code{16} (for \\code{long double}). This will also change the default\nfloating-point type for arrays, including shortcuts such as\n\\code{Vector}, \\code{Matrix}, \\code{SymmMatrix}. Note that if you\nspecify \\code{16} but your compiler cannot support it\n(i.e.\\ \\code{sizeof(long double)==8}) then \\Adept\\ would produce\nsub-optimal code so will fail to compile.\n%\n\\citem{ADEPT\\_STACK\\_STORAGE\\_STL} Use the C++ standard template\nlibrary \\code{vector} or \\code{valarray} classes for storing the\nrecording and the list of gradients, rather than dynamically allocated\narrays. In practice, this tends to slow down the code.\n%\n\\citem{ADEPT\\_MULTIPASS\\_SIZE} This is set to an integer, invariably a\npower of two, specifying the number of rows or columns of a Jacobian\nthat are calculated at once. The optimum value depends on the platform\nand the capability of the compiler to optimize loops whose length is\nknown at compile time.\n% \n\\citem{ADEPT\\_MULTIPASS\\_SIZE\\_ZERO\\_CHECK} This is also set to an\ninteger; if it is greater than \\codebf{ADEPT\\_MULTIPASS\\_SIZE}, then\nthe \\code{Stack::jacobian\\_reverse} function checks gradients are\nnon-zero before using them in a multiplication.\n%\n\\citem{ADEPT\\_THREAD\\_LOCAL} This can be used to specify the way that\nthread-local storage is declared by your compiler.  Thread-local\nstorage is used to ensure that the \\Adept\\ library is thread-safe. By\ndefault this variable is not defined initially, and then later in\n\\code{adept/base.h} it is set to an appropriate value on your system:\n\\code{thread\\_local} if you compile with the C++11 standard, otherwise\n\\code{\\_\\_declspec(thread)} on Microsoft Visual C++, an empty\ndeclaration on Mac (since thread-local storage is not available on\nmany Mac platforms) and \\code{\\_\\_thread} otherwise (appropriate for\nat least the GNU, Intel, Sun and IBM compilers). To override the\ndefault behaviour, define this variable yourself in\n\\code{adept/base.h}.\n\\end{description}\n\n\\section{Frequently asked questions}\n\\label{sec:faq}\n\\begin{description}\n\\item[Why are all the gradients coming out of the automatic\n  differentiation zero?] You have almost certainly omitted or\n  misplaced the call of the \\code{adept::Stack} member function\n  ``\\code{new\\_recording()}''. It should be placed \\emph{after} the\n  independent variables in the algorithm have been initialized, but\n  before any subsequent calculations are performed on these\n  variables. If it is omitted or placed before the point where the\n  independent variables are initialized, the differential statements\n  corresponding to this initialization (which are all of the form\n  $\\delta x=0$), will be placed in the list of differential statements\n  and will unhelpfully set to zero all your gradients right at the\n  start of a forward pass (resulting from a call to \\code{forward()})\n  or set them to zero right at the end of a reverse pass (resulting\n  from a call to \\code{reverse()}).\n\\item[Why are the gradients coming out of the automatic\n  differentiation NaN or Inf (even though the value is correct)?] This\n  can occur if the algorithm contains operations for which the\n  derivative is not finite (e.g.\\ $\\sqrt{a}$ for $a=0$), or\n  constructions where a non-finite value is produced but subsequently\n  made finite (e.g.\\ $\\exp(-1.0/a)$ for $a=0$). Usually the algorithm\n  can be recoded to avoid these problems, if the location of the\n  problematic operations can be identified. The simplest way to locate\n  the offending statement is to recompile your code with the \\code{-g}\n  option and the \\code{ADEPT\\_TRACK\\_NON\\_FINITE\\_GRADIENTS}\n  preprocessor variable set (see section\n  \\ref{sec:configuring_no_lib}). Run the program within a debugger and\n  a \\code{non\\_finite\\_gradient} exception will be thrown, which if\n  not caught within the program will enable you to locate the line in\n  your code where the problem originated.  You may need to turn\n  optimizations off (compile with \\code{-O0}) for the line\n  identification to be accurate. You can also turn on trapping of\n  floating-point exceptions, as explained in the description of the\n  \\code{ADEPT\\_INIT\\_REAL\\_SNAN} option in section\n  \\ref{sec:configuring_no_lib}.\n\\item[Why are the gradients coming out of the automatic\n  differentiation wrong?] Before suspecting a bug in \\Adept, note that\n  round-off error can lead to incorrect gradients even in hand-coded\n  differential code. Consider the following:\n\\begin{lstlisting}\n int main() {\n   Stack stack;\n   adouble a = 1.0e-26, b;\n   stack.new_recording();\n   b = sin(a) / a;\n   b.set_gradient(1.0);\n   stack.compute_adjoint();\n   std::cout << \"a=\" << a << \", b=\" << b << \", db/da=\" << a.get_gradient() << \"\\n\";\n }\n\\end{lstlisting}\n  We know that near \\code{a=0} we should have \\code{b=1} and the\n  gradient should be \\code{0}.  But running the program above will\n  give a gradient of \\code{1.71799e+10}. If you hand-code the\n  gradient, i.e.\n\\begin{lstlisting}\n double A = 1.0e-26;\n double dB_dA = cos(A)/A - sin(A) / (A*A);\n\\end{lstlisting}\n  you will you will also get the wrong gradient.  You can see that the\n  answer is the difference of two very large numbers and so subject to\n  round-off error.  This example is therefore not a bug of \\Adept, but\n  a limitation of finite-precision machines.  To check this, try\n  compiling your code using either the ADOL-C or CppAD automatic\n  differentiation tools; I have always found these tools to give\n  exactly the same gradient as \\Adept. Unfortunately, round-off error\n  can build up over many operations to give the wrong result, so there\n  may not be a simple solution in your case.\n\\item[Can \\Adept\\ reuse a stored tape for multiple runs of the same\n  algorithm but with different inputs?] No. \\Adept\\ does not store the\n  full algorithm in its stack (as ADOL-C does in its tapes, for\n  example), only the derivative information.  So from the stack alone\n  you cannot rerun the function with different inputs.  However,\n  rerunning the algorithm including recomputing the derivative\n  information is fast using \\Adept, and is still faster than libraries\n  that store enough information in their tapes to enable a tape to be\n  reused with different inputs.  It should be stressed that for any\n  algorithm that includes different paths of execution (``if''\n  statements) based on the values of the inputs, such a tape would\n  need to be rerecorded anyway. This includes any algorithm containing\n  a look-up table.\n\\item[Why does my code crash with a segmentation fault?] This means it\n  is trying to access a memory address not belonging to your program,\n  and the first thing to do is to run your program in a debugger to\n  find out at what point in your code this occurs. If it is in the\n  \\code{adept::aReal} constructor (note that \\code{aReal} is synonymous with\n  \\code{adouble}), then it is very likely that you have tried to\n  initiate an \\code{adept::adouble} object before initiating an\n  \\code{adept::Stack} object. As described in section\n  \\ref{sec:stack_setup}, there are good reasons why you need to\n  initialize the \\code{adept::Stack} object first.\n\\item[How can I interface \\Adept\\ with a matrix library such as\n  Eigen?]  Unfortunately the use of expression templates in\n  \\Adept\\ means that it does not work optimally (if it works at all)\n  with third-party matrix libraries that use expression\n  templates. This is the reason why Adept 2.0 combines array\n  functionality with automatic differentiation in a single\n  expression-template framework.\n\\item[Do you have plans to enable \\Adept\\ to produce Hessian\n  matrices?]  Not in the near future as this is a huge change.\n  However, if your cost function $J(\\x)$ (also known as a cost\n  function or penalty function) has a specific form then the\n  approximate Hessian matrix can be computed from the Jacobian matrix,\n  as described in chapter \\ref{chap:optimize}.\n\\item[Why doesn't the ternary operator work?] Some compilers will fail\n  to compile the following function:\n\\begin{lstlisting}\n adept::adouble piecewise(adept::adouble x) {\n   return x < 1.0 ? x*x : 2.0*x-1.0;\n }\n\\end{lstlisting}%\nThe reason is that these compilers require that the two possible\noutcomes of the ternary operator have the same type, but due to the\nuse of expression templates, the types of these mathematical\nexpressions are actually different.  The ternary operator cannot be\noverloaded to allow such arguments. The solution is to explicitly\nconvert the outcomes to \\code{adouble}:\n\\begin{lstlisting}\n adept::adouble piecewise(adept::adouble x) {\n   return x < 1.0 ? adept::adouble(x*x) : adept::adouble(2.0*x-1.0);\n }\n\\end{lstlisting}\n\\item[Why is my executable so huge?]  Probably you are including\n  debugging symbols by compiling with the \\code{-g} option. Expression\n  templates need long strings to describe them, so this extra content\n  can increase the size of object files and executables by a factor of\n  ten.  This does not slow down execution, but for production code you\n  may wish to compile without debugging symbols, or if you use the GNU\n  compiler use instead the \\code{-g1} option which stores a reduced\n  amount of debugging information.\n\\item[Why do I get incorrect behaviour when I use the ``\\code{auto}''\n  keyword?] Since C++11, many programmers make widespread use of\n  \\code{auto} as the type of a local object that can be inferred from\n  its initializer. This is problematic for most expression-template\n  libraries, including \\Adept, because expressions are not evaluated\n  immediately.  For example, dividing one \\code{aReal} object by\n  another returns an\n  \\code{adept::internal::BinaryOperation<Real,aReal,Divide,aReal>}\n  object, and the division is only performed (and differentiated) when\n  this object is assigned to an \\code{aReal} object. The \\code{auto}\n  keyword will be interpreted as the type of the internal object, but\n  this internal object may contain references to temporary objects\n  that make up the other parts of the expression, and which go out of\n  scope after the semi-colon at the end of the \\code{auto} statement.\n  In this example the correct behaviour is obtained by replacing\n  \\code{auto} with \\code{aReal}.  Never to use the \\code{auto} keyword\n  when initializing an object from an \\Adept\\ expression.\n\\end{description}\n\\section{Copyright and license for \\Adept\\ software}\n\\label{sec:license}\nVersions 1.9 of \\Adept\\ and later are owned and copyrighted jointly by\nthe University of Reading and the European Centre for Medium Range\nWeather Forecasts. The copyright to versions 1.1 and earlier is held\nsolely by the University of Reading.\n\nSince version 1.1, the \\Adept\\ library is released under the Apache\nLicense, Version 2.0, which is available at\n\\url{http://www.apache.org/licenses/LICENSE-2.0}.  In short, this\nfree-software license permits you to use the library for any purpose,\nand to modify it and combine it with other software to form a larger\nwork.  If you choose, you may release the modified software in either\nsource code or object code form, so may use \\Adept\\ in both\nopen-source software and non-free proprietary software. However,\ndistributed versions must retain copyright notices and also distribute\nboth the information in the NOTICES file and a copy of the Apache\nLicense.  Different license terms may be applied to your distributed\nsoftware, although they must include the conditions on redistribution\nprovided in the Apache License.  This is a just short summary; if in\ndoubt, consult the text of the license.\n\nIn addition to the legally binding terms of the license, it is\n\\emph{requested} that:\n\\begin{itemize}\n\\item You cite \\cite{Hogan2014} in publications describing algorithms\n  and software that make use of the \\Adept\\ library. While not not a\n  condition of the license, this is good honest practice in science\n  and engineering.\n\\item If you make modifications to the \\Adept\\ library that might be\n  useful to others, you release your modifications under the terms of\n  the Apache License, Version 2.0, so that they are available to\n  others and could also be merged into a future official version of\n  \\Adept. If you do not state the license applied to your\n  modifications then by default they will be under the terms of the\n  Apache License. You will retain copyright of your modifications, but\n  if your modifications are written in the course of employment then\n  under almost all circumstances (including employment by a\n  University) it is your employer who holds the copyright.  Therefore\n  you should obtain permission from them to release your modifications\n  under the Apache License.\n\\end{itemize}\n\nNote that other source files in the \\Adept\\ package used for\ndemonstrating and benchmarking \\Adept\\ are released under the GNU\nall-permissive license\\footnote{The GNU all-permissive license reads:\n  \\emph{Copying and distribution of this file, with or without\n    modification, are permitted in any medium without royalty provided\n    the copyright notice and this notice are preserved.  This file is\n    offered as-is, without any warranty.}}, which is specified at the\ntop of all files it applies to.\n\n\\Adept\\ version 1.0 was released under the terms of the GNU General\nPublic License (GPL) and so could not be released as part of a larger\nwork unless the entire work was released under the conditions of the\nGPL.  It is hoped that the switch to the Apache License will\nfacilitate wider use of \\Adept.\n\n\\section*{Acknowledgments}\nAdept 1.0 was developed by Robin Hogan at the University of Reading\nwith funding from European Space Agency contract\n40001041528/11/NL/CT. Some of the modifications to produce version 1.1\nwere funded by a National Centre for Earth Observation Mission Support\ngrant (Natural Environment Research Council grant NE/H003894/1). Dr\nBrian Tse is thanked for his work exploring different parallelization\nstrategies during this period. Subsequent development has been carried\nout under employment at the European Centre for Medium Range Weather\nForecasts.\n\n\\begin{thebibliography}{00}\n\\markright{References}\n\\harvarditem{Bell}{2007}{Bell2007}Bell, B., 2007: CppAD: A package for C++\nalgorithmic differentiation. \\url{http://www.coin-or.org/CppAD}\n% \n\\harvarditem{Liu and Nocedal}{1989}{Liu+1989}Liu, D. C., and Nocedal,\n  J., 1989: On the limited memory method for large scale\noptimization. \\emph{Math.\\ Programming B,} {\\bf 45,} 503--528.\n%\n\\harvarditem{Gay}{2005}{Gay2005}Gay, D. M., 2005: Semiautomatic\ndifferentiation for efficient gradient computations.  In\n\\emph{Automatic Differentiation: Applications, Theory, and\n  Implementations}, H. M. B\\\"ucker, G. F. Corliss, P.  Hovland,\nU. Naumann and B. Norris (eds.), Springer, 147--158.\n%\n\\harvarditem{Griewank et~al.}{1996}{Griewank+1996}Griewank, A.,\n  Juedes, D., and Utke, J., 1996:  Algorithm 755: ADOL-C: a package for the\nautomatic differentiation of algorithms written in C/C++. \\textit{ACM\n  Trans.\\ Math.\\ Softw.,} \\textbf{22,} 131--167.\n\\harvarditem{Hogan}{2014}{Hogan2014}Hogan, R. J., 2014: Fast reverse-mode\n  automatic differentiation using expression templates in\n  C++. \\textit{ACM Trans.\\ Math.\\ Softw.,} \\textbf{40,} 26:1-26:16.\n\\harvarditem{Veldhuizen}{1995}{Veldhuizen1995}Veldhuizen, T., 1995:\nExpression templates. {\\it C++ Report,} {\\bf 7,} 26--31.\n\\end{thebibliography}\n\n\\end{document}\n"
  },
  {
    "path": "doc/adept_reference.tex",
    "content": "\\documentclass[10pt,a4,landscape]{article}\n% Page set up\n\\setlength{\\oddsidemargin}{-1cm} %{0.5cm}\n\\setlength{\\evensidemargin}{-1cm} %{0.5cm}\n\\setlength{\\topmargin}{-3cm}\n%\\setlength{\\topmargin}{0cm}\n%\\setlength{\\textheight}{24cm}\n%\\setlength{\\textwidth}{16cm}\n\\setlength{\\textheight}{19cm}\n\\setlength{\\textwidth}{26cm}\n\\setlength{\\marginparsep}{0.5cm}\n\\setlength{\\marginparwidth}{0cm}\n%\\setlength{\\parindent}{1em}\n%\\setlength{\\parskip}{0.5ex}\n\\def\\myvskip{\\vskip 1ex}\n\\def\\hangingpar{\\parshape 2 0cm \\linewidth 1ex \\dimexpr\\linewidth-1ex\\relax}\n\\renewcommand{\\baselinestretch}{1.05}\n\\sloppy\n%\\usepackage{multicol}\n\\usepackage{lmodern}\\usepackage[T1]{fontenc}\n\\usepackage{color}\n\\usepackage[figuresright]{rotating}\n\\DeclareFontFamily{T1}{lmttc}{\\hyphenchar \\font-1 }\n\\DeclareFontShape{T1}{lmttc}{m}{n}\n     {<-> ec-lmtlc10}{}\n\\DeclareFontShape{T1}{lmttc}{m}{it}\n     {<->sub*lmttc/m/sl}{}\n\\DeclareFontShape{T1}{lmttc}{m}{sl}\n     {<-> ec-lmtlco10}{}\n%\\def\\myfont{\\fontfamily{cmss}\\fontseries{lmtt}\\selectfont}\n\\def\\myfont{\\fontfamily{cmss}\\selectfont}\n\\def\\mysize{\\footnotesize}\n\\def\\mysize{\\small}\n\\def\\codeindent{\\hspace{\\tabcolsep}}\n\\setlength{\\parindent}{0pt}\n\\def\\code#1{\\texttt{#1}}\n\\renewcommand{\\rmdefault}{cmss}\n\\begin{document}\n\\pagestyle{empty}\n\\twocolumn\n\\mysize\\myfont\\section*{\\Huge Adept Quick Reference}\n%\\section*{General}\nAll functions and types are placed in the \\code{adept} namespace.\n\\subsection*{Header files}\n\\begin{tabular}{ll}\n\\code{adept.h} & Include if only scalar automatic differentiation is required\\\\\n\\code{adept\\_arrays.h} & Include if array capabilities are needed as well\\\\\n\\code{adept\\_fortran.h} & Interface to Fortran 2018 array descriptors\\\\\n\\code{adept\\_optimize.h} & Minimization algorithms, e.g.\\ Levenberg-Marquardt\\\\\n\\code{adept\\_source.h} & Include entire Adept library, so linking to library not required \\\\\n\\end{tabular}\n\n%\\section*{Automatic differentiation functionality}\n\\subsection*{Scalar types}\n\\begin{tabular}{ll}\n\\code{Real} & Passive scalar type used for differentiation (usually\n\\code{double})\\\\\n\\code{aReal} & Active scalar of underlying type \\code{Real} \\\\\n\\code{adouble}, \\code{afloat} & Active scalars of underlying type\n\\code{double} and \\code{float}\\\\\n\\end{tabular}\n\\subsection*{Basic reverse-mode workflow}\n\\begin{tabular}{ll}\n\\code{Stack stack;} & Object to store derivative information\\\\\n\\code{aVector x = \\{1.0, 2.0\\};} & Initialize independent (input) variables (C++11)\\\\\n\\code{stack.new\\_recording();} & Start a new recording\\\\\n\\code{aReal J = algorithm(x);} & Any complicated algorithm here\\\\\n\\code{J.set\\_gradient(1.0);} & Seed adjoint of cost function\\\\\n\\code{stack.reverse();} & Perform reverse-mode differentiation\\\\\n\\code{Vector dJ\\_dx = x.get\\_gradient();} & Return gradients of output with respect to inputs\\\\\n\\end{tabular}\n\n\n\\subsection*{Basic Jacobian workflow}\n\\begin{tabular}{ll}\n\\code{Stack stack;} & Object to store derivative information\\\\\n\\code{aVector x = \\{1.0, 2.0\\};} & Initialize independent (input) variables (C++11)\\\\\n\\code{stack.new\\_recording();} & Start a new recording\\\\\n\\code{aVector y = algorithm(x);} & Algorithm with vector output\\\\\n\\code{stack.independent(x);} & Declare independent variables \\\\\n\\code{stack.dependent(y);} & Declare dependent variables\\\\\n\\code{Matrix dy\\_dx = stack.jacobian();} & Compute Jacobian matrix\\\\\n\\end{tabular}\n\\subsection*{\\code{aReal} member functions}\nThe first three functions below also work with active array arguments, where\n\\code{g} would be of the equivalent passive array type:\\\\\n\\begin{tabular}{ll}\n\\code{.set\\_gradient(g)} & Initialize gradient to \\code{g} \\\\\n\\code{.get\\_gradient()} & After forward or reverse pass, return gradient\\\\\n\\code{.get\\_gradient(g)} & As above, but writing gradient to \\code{g}\\\\\n\\code{.add\\_derivative\\_dependence(a,p)} & Add \\code{p}$\\times\\delta$\\code{a} to the stack\\\\\n\\code{.append\\_derivative\\_dependence(a,p)} & Append $+$\\code{p}$\\times\\delta$\\code{a} to the stack\\\\\n\\end{tabular}\n\n\\subsection*{\\code{Stack} member functions}\nConstructors:\\\\\n\\begin{tabular}{ll}\n\\code{Stack stack;} & Construct and activate immediately \\\\\n\\code{Stack stack(false);} & Construct in inactive state\\\\\n\\end{tabular}\n\nMember functions:\\\\\n\\begin{tabular}{ll}\n\\code{.new\\_recording()} & Clear any existing differential statements\\\\\n\\code{.pause\\_recording()} & Pause recording (\\code{ADEPT\\_PAUSABLE\\_RECORDING} needed)\\\\\n\\code{.continue\\_recording()} & Continue recording \\\\\n\\code{.is\\_recording()} & Is Adept currently recording?\\\\\n\\code{.forward()} & Perform forward-mode differentiation\\\\\n\\code{.compute\\_tangent\\_linear()} & ...as above\\\\\n\\code{.reverse()} & Perform reverse-mode differentiation\\\\\n\\code{.compute\\_adjoint()} & ...as above\\\\\n\\code{.independent(x)} & Declare an independent variable (active scalar or array)\\\\\n\\code{.independent(xptr,n)} & Declare \\code{n} independent scalar variables starting at \\code{xptr} \\\\\n\\code{.dependent(y)} & Declare a dependent variable (active scalar or array)\\\\\n\\code{.dependent(yptr,n)} & Declare \\code{n} dependent scalar variables starting at \\code{yptr}\\\\\n\\code{.jacobian()} & Return Jacobian matrix\\\\\n\\code{.jacobian(jacptr)} & Place Jacobian matrix into \\code{jacptr} (column major)\\\\\n\\code{.jacobian(jacptr,false)} & Place Jacobian matrix into \\code{jacptr} (row major)\\\\\n\\code{.clear\\_gradients()} & Clear gradients set with \\code{set\\_gradient} function \\\\\n\\code{.clear\\_independents()} & Clear independent variables\\\\\n\\code{.clear\\_dependents()} & Clear dependent variables\\\\\n\\code{.n\\_independents()} & Number of independent variables declared \\\\\n\\code{.n\\_dependents()} & Number of dependent variables declared\\\\\n%\\end{tabular}\n%\\begin{tabular}{ll}\n\\code{.print\\_status()} & Print status of \\code{Stack} to standard output\\\\\n\\code{.print\\_statements()} & Print list of differential statements\\\\\n\\code{.print\\_gradients()} & Print current values of gradients\\\\\n\\code{.activate()} & Activate the stack \\\\\n\\code{.deactivate()} & Deactivate the stack\\\\\n\\code{.is\\_active()} & Is the stack currently active?\\\\\n\\code{.memory()} & Return number of bytes currently used\\\\\n\\code{.preallocate\\_statements(n)} & Preallocate space for \\code{n} statements\\\\\n\\code{.preallocate\\_operations(n)} & Preallocate space for \\code{n} operations\\\\\n\\end{tabular}\n\n\\subsection*{Query functions in \\code{adept} namespace}\n\\begin{tabular}{ll}\n\\code{active\\_stack()} & Return pointer to currently active \\code{Stack} object\\\\\n\\code{version()} & Return \\code{std::string} with Adept version number\\\\\n\\code{configuration()} & Return \\code{std::string} describing Adept configuration\\\\\n\\code{have\\_matrix\\_multiplication()} & Adept compiled with matrix multiplication (BLAS)?\\\\\n\\code{have\\_linear\\_algebra()} & Adept compiled with linear-algebra (LAPACK)?\\\\\n\\code{set\\_max\\_blas\\_threads(n)} & Set maximum threads for matrix operations\\\\\n\\code{max\\_blas\\_threads()} & Get maximum threads for matrix operations\\\\\n\\code{is\\_thread\\_unsafe()} & Global \\code{Stack} object is \\textit{not} thread-local?\\\\\n\\end{tabular}\n\\newpage\n%\\section*{Array functionality}\n\\subsection*{Dense dynamic array types}\n\\begin{tabular}{ll}\n\\code{Vector}, \\code{Matrix}, \\code{Array3D}, \\code{Array4D}... \\code{Array7D} & Arrays of type \\code{Real}\\\\\n\\code{intVector}, \\code{intMatrix}, \\code{intArray3D}...  \\code{intArray7D}& Arrays of type \\code{int}\\\\\n\\code{boolVector}, \\code{boolMatrix}, \\code{boolArray3D}...  \\code{boolArray7D}& Arrays of type \\code{bool}\\\\\n\\code{floatVector}, \\code{floatMatrix}, \\code{floatArray3D}... \\code{floatARray7D} & Arrays of type \\code{float}\\\\\n\\code{aVector}, \\code{aMatrix}, \\code{aArray3D}... \\code{aArray7D} & Active arrays of type \\code{Real}\\\\\n\\end{tabular}\n\\myvskip\nDefine new dynamic array types as follows:\\\\\n\\begin{tabular}{l}\n\\code{typedef Array<short,2,false> shortMatrix;}\\\\\n\\code{typedef Array<float,3,true> afloatArray3D;}\n\\end{tabular}\n\n\\subsection*{Dense fixed-size array types}\n\\begin{tabular}{ll}\n\\code{Vector2}, \\code{Vector3}, \\code{Vector4} & Passive vectors of fixed length 2--4\\\\ \n\\code{Matrix22}, \\code{Matrix33}, \\code{Matrix44} & Passive matrices of fixed size 2$\\times$2, 3$\\times$3, 4$\\times$4\\\\\n\\code{aVector2}, \\code{aVector3}, \\code{aVector4} & Active vectors of fixed length 2--4\\\\ \n\\code{aMatrix22}, \\code{aMatrix33}, \\code{aMatrix44} & Active matrices of fixed size 2$\\times$2, 3$\\times$3, 4$\\times$4\\\\\n\\end{tabular}\n\\myvskip\nDefine new fixed array types as follows:\\\\\n\\begin{tabular}{l}\n\\code{typedef FixedArray<short,false,2,4> shortMatrix24;}\\\\\n\\code{typedef FixedArray<Real,true,3,3,3> aArray333;}\n\\end{tabular}\n\\subsection*{Special square matrix types}\n\\begin{tabular}{ll}\n\\code{SymmMatrix}, \\code{aSymmMatrix} & Symmetric matrix\\\\\n\\code{DiagMatrix}, \\code{aDiagMatrix} & Diagonal matrix\\\\\n\\code{TridiagMatrix}, \\code{aTridiagMatrix} & Tridiagonal matrix\\\\\n\\code{PentadiagMatrix}, \\code{aPentadiagMatrix} & Pentadiagonal matrix\\\\\n\\code{LowerMatrix}, \\code{aLowerMatrix} & Lower-triangular matrix\\\\\n\\code{UpperMatrix}, \\code{aUpperMatrix} & Upper-triangular matrix\\\\\n\\end{tabular}\n\\subsection*{Dense dynamic array constructors}\n\\begin{tabular}{ll}\n\\code{Matrix M;} & Create an empty matrix of type \\code{Real}\\\\\n\\code{Matrix N(M);} & Create matrix sharing data with existing matrix\\\\\n\\code{Matrix N = M;} & ...as above\\\\\n\\code{Matrix N(3,4);} & Create matrix with size 3$\\times$4\\\\\n\\code{Matrix N(dimensions(3,4));} & ...as above\\\\\n\\code{Matrix N(M.dimensions());} & Create matrix with the same size as \\code{M}\\\\\n\\code{Matrix N(ptr,dimensions(3,4));} & Create 3$\\times$4 matrix sharing data from pointer \\code{ptr}\\\\\n\\code{Matrix N = log(M);} & Create matrix containing copy of right-hand-side\\\\\n\\code{Matrix N = \\{\\{1.0,2.0\\},\\{3.0,4.0\\}\\};} & Create 2$\\times$2 matrix from initializer list (C++11)\\\\\n\\end{tabular}\n\\subsection*{Array resize and link member functions}\n\\begin{tabular}{ll}\n\\code{.clear()} & Return array to original empty state\\\\\n\\code{.resize(3,4)} & Resize array discarding data\\\\\n\\code{.resize(dimensions(3,4))} & ...as above\\\\\n\\code{.resize\\_row\\_major(3,4)} & Resize with row-major storage (default)\\\\\n\\code{.resize\\_column\\_major(3,4)} & Resize with column-major storage\\\\\n\\code{.resize(M.dimensions())} & Resize to same as \\code{M}\\\\\n\\code{.resize\\_contiguous(...)} & Resize guaranteeing contiguous storage\\\\\n\\code{N >{}>= M;} & Discard existing data and link to array on right-hand-side\\\\\n\\end{tabular}\n\\subsection*{Array query member functions}\n\\begin{tabular}{ll}\n\\code{::rank} & Number of array dimensions\\\\\n\\code{.empty()} & Return \\code{true} if array is empty, \\code{false} otherwise\\\\\n\\code{.dimensions()} & Return an object that can be used to resize other arrays\\\\\n\\code{.dimension(i)} & Return length of dimension \\code{i} (0 based)\\\\\n\\code{.size()} & Return total number of elements\\\\\n\\code{.data()} & Return pointer to underlying passive data\\\\\n\\code{.const\\_data()} & Return \\code{const} pointer to underlying data\\\\\n\\end{tabular}\n\\subsection*{Array filling}\n\\begin{tabular}{ll}\n\\code{M = 1.0;} & Fill all elements of array with the same number\\\\\n\\code{M <{}< 1.0, 2.0, 3.0, 4.0;} & Fill first four elements of array\\\\\n\\code{M = \\{\\{1.0,2.0\\},\\{3.0,4.0\\}\\};} & Fill 2$\\times$2 matrix (C++11)\\\\\n\\end{tabular}\n\\subsection*{Array indexing and slicing}\nDense arrays can be indexed/sliced using the function-call operator\nwith as many arguments as there are dimensions (e.g.\\ index a matrix\nwith \\code{M(i,j)}). In all cases a slice can be used as an lvalue or\nrvalue. If all arguments are scalars then a single element of the\narray is extracted. The following special values are available:\\\\\n\\begin{tabular}{ll}\n\\code{end} & The last element of the dimension being indexed\\\\\n\\code{end-1} & Penultimate element of indexed dimension (any integer arithmetic possible)\\\\\n\\end{tabular}\n\nIf one or more argument is a \\textit{regular index range} then the return\ntype will be an \\code{Array} pointing to part of the original\narray. For every scalar argument, its rank will be reduced by one\ncompared to the original array. The available ranges are:\\\\\n\\begin{tabular}{ll}\n\\code{\\_\\_} & All elements of indexed dimension \\\\\n\\code{range(ibeg,iend)} & Contiguous range from \\code{ibeg} to \\code{iend}\\\\\n\\code{stride(ibeg,iend,istride)} & Strided range (\\code{istride} can be negative but not zero)\\\\\n\\end{tabular}\n\nIf any of the arguments is a \\textit{irregular index range} (such as\nan \\code{intVector} containing an arbitrary list of indices) then the\nreturn type will be an \\code{IndexedArray}. If used as an lvalue, it\nwill modify the original array, but if passed into a function\nreceiving an \\code{Array} type then any modifications inside the\nfunction will not affect the original array.\n\\subsection*{Passing arrays to and from functions}\nThere are three ways an array can be received as an argument to a function:\\\\\n\\begin{tabular}{ll}\n\\code{Matrix\\&} & For an array that might be resized in the function\\\\\n\\code{Matrix} & For an array or array slice to be modified inside the function\\\\\n\\code{const Matrix\\&} & For a read-only array, array slice or array expression\\\\\n\\end{tabular}\n\n\\subsection*{Member functions returning lvalue}\nThe functions in this section return an \\code{Array} that links to the\noriginal data and can be used on the left- or right-hand-side of an\nassignment. The following only work on dynamic or fixed-size dense\narrays:\\\\\n\\begin{tabular}{ll}\n\\code{.subset(ibeg0,iend0,ibeg1,iend1,...)} & Contiguous subset\\\\\n\\code{.permute(i0,i1,...)} & Permute dimensions\\\\\n\\code{.diag\\_matrix()} & For vector, return \\code{DiagMatrix}\\\\\n\\code{.soft\\_link()} \\\\\n\\end{tabular}\n\nThe following works on any matrix:\\\\\n\\begin{tabular}{ll}\n\\code{.T()} & Transpose of matrix\\\\\n\\end{tabular}\n\nThe following work only with square matrices, including special square\nmatrices\\\\\n\\begin{tabular}{ll}\n\\code{.diag\\_vector()} & Return vector linked to its diagonals\\\\\n\\code{.diag\\_vector(i)} & Return vector linked to offdiagonal \\code{i}\\\\\n\\code{.submatrix\\_on\\_diagonal(ibeg,iend)} & Return square matrix lying on diagonal\\\\\n\\end{tabular}\n\\subsection*{Elemental mathematical functions}\nReturn passive part of active object: \\code{value(x)}\n\n\\hangingpar\nBinary operators: \\code{+}, \\code{-},\n  \\code{*} and \\code{/}.\n\n\\hangingpar\nAssignment operators:  \\code{+=}, \\code{-=}, \\code{*=} and \\code{/=}.\n\n\\hangingpar\nUnary functions: \\code{sqrt}, \\code{exp},\n  \\code{log}, \\code{log10}, \\code{sin}, \\code{cos}, \\code{tan},\n  \\code{asin}, \\code{acos}, \\code{atan}, \\code{sinh}, \\code{cosh},\n  \\code{tanh}, \\code{abs}, \\code{asinh}, \\code{acosh}, \\code{atanh},\n  \\code{expm1}, \\code{log1p}, \\code{cbrt}, \\code{erf}, \\code{erfc},\n  \\code{exp2}, \\code{log2}, \\code{round}, \\code{trunc}, \\code{rint},\n  \\code{nearbyint} and \\code{fastexp}.\n\n\\hangingpar\nBinary functions: \\code{pow}, \\code{atan2}, \\code{min},\n  \\code{max}, \\code{fmin} and \\code{fmax}.\n\n\\hangingpar\nUnary functions returning \\code{bool} expressions: \\code{isfinite},\n\\code{isinf} and \\code{isnan}.\n\n\\hangingpar\nBinary operators returning \\code{bool} expressions: \\code{==},\n\\code{!=}, \\code{>}, \\code{<}, \\code{>=} and \\code{<=}.\n\n\\subsection*{Alias-related functions}\n\\begin{tabular}{ll}\n\\code{eval(E)} & Avoid aliasing by evaluating expression \\code{E} into an array\\\\\n\\code{noalias(E)} & Turn off alias checking for expression \\code{E}\\\\\n\\end{tabular}\n\\subsection*{Reduction functions}\n\\begin{tabular}{ll}\n\\code{sum(M)} & Return the sum of all elements in \\code{M}\\\\\n\\code{sum(M,i)} & Return array of rank one less than \\code{M} containing sum along \\code{i}th dimension (0 based)\\\\\n\\end{tabular}\n\n\\hangingpar Other reduction functions working in the same way:\n\\code{mean}, \\code{product}, \\code{minval}, \\code{maxval}, \\code{norm2}.\n\n\\begin{tabular}{ll}\n\\code{dot\\_product(x,y)} & The same as \\code{sum(a*b)} for rank-1\narguments\\\\\n\\end{tabular}\n\\subsection*{Expansion functions}\n\\begin{tabular}{ll}\n\\code{spread<d>(M,n)} & Replicate \\code{M} array expression \\code{n}\ntimes along dimension \\code{d}\\\\\n\\code{outer\\_product(x,y)} & Return rank-2 outer product from two\nrank-1 arguments\\\\\n\\end{tabular}\n\\subsection*{Matrix multiplication and linear algebra}\n\\begin{tabular}{ll}\n\\code{transpose(M)} & Transpose matrix or 2D matrix expression\\\\\n\\code{matmul(M,N)} & Matrix multiply, where at least one argument must\nbe a matrix, and \\\\\n&orientation of any vector arguments is inferred\\\\\n\\code{M ** N} & Shortcut for \\code{matmul}; precedence is the same as normal\n  multiply\\\\\n\\code{inv(M)} & Inverse of square matrix\\\\\n\\code{solve(A,x)} & Solve system of linear equations\\\\ \n\\end{tabular}\n\n\\subsection*{Preprocessor variables}\nThe following can be defined to change the behaviour of your code:\\\\\n\\begin{tabular}{ll}\n\\code{ADEPT\\_STACK\\_THREAD\\_UNSAFE} & Thread-unsafe \\code{Stack} (faster)\\\\\n\\code{ADEPT\\_RECORDING\\_PAUSABLE} & Recording can be paused (slower)\\\\\n\\code{ADEPT\\_NO\\_AUTOMATIC\\_DIFFERENTIATION} & Turn off differentiation\\\\\n\\code{ADEPT\\_TRACK\\_NON\\_FINITE\\_GRADIENTS} & Exception thrown if derivative non-finite\\\\\n\\code{ADEPT\\_BOUNDS\\_CHECKING} & Check array bounds (slower)\\\\\n\\code{ADEPT\\_NO\\_ALIAS\\_CHECKING} & Turn off alias checking (faster)\\\\\n\\code{ADEPT\\_NO\\_DIMENSION\\_CHECKING} & Turn off dimension checking (faster)\\\\\n\\code{ADEPT\\_INIT\\_REAL\\_SNAN} & Initialize real numbers to signaling NaN\\\\\n\\code{ADEPT\\_INIT\\_REAL\\_ZERO} & Initialize real numbers to zero\\\\\n\\code{ADEPT\\_FAST\\_EXPONENTIAL} & Use faster vectorizable exponential\\\\\n\\code{ADEPT\\_FAST\\_SCALAR\\_EXPONENTIAL} & Provide faster \\code{adept::exp} for scalars\\\\\n\\code{ADEPT\\_FAST} & Enable bit-reproducible options\\\\\n\\code{ADEPT\\_STORAGE\\_THREAD\\_SAFE} & Thread-safe array storage (slower)\\\\\n\\code{ADEPT\\_SUPPORT\\_HUGE\\_ARRAYS} & Use \\code{std::size\\_t} for array dimensions\\\\\n\\code{ADEPT\\_REAL\\_TYPE\\_SIZE} & Size of \\code{Real}: 4 or 8 (default 8)\n\\end{tabular}\nThe \\code{ADEPT\\_VERSION} variable contains version number as an\ninteger, e.g.\\ \\code{20108}, while \\code{ADEPT\\_VERSION\\_STR} contains\nit as a string, e.g.\\ ``2.0.8''.\n\\onecolumn\n\n\\newpage\n\n\\def\\Y{\\textbf{Y}}\n\\def\\r#1{\\rotatebox{90}{#1}}\n\n\\setlength{\\topmargin}{-3cm}\n\\begin{table}[tb!]\n%\\caption{\n\\begin{center}\n%\\parbox{0.9\\columnwidth}{\n\\mysize\\myfont Comparison of array syntax between\n  Fortran 90 (and later), Matlab and the C++ libraries Adept and Eigen\n%In these examples, \\code{v} and \\code{w} are vectors\n%  and \\code{A} and \\code{B} are matrices.\n%}\n\n  \\footnotesize\n  \\myfont\n\\begin{tabular}{lllll}\n\\hline\n{\\large\\phantom{X}}\n& \\mysize Fortran 90+ & \\mysize Matlab & \\mysize C++ Adept (with C++11 features) & \\mysize C++ Eigen \\\\\n\\hline\nMaximum dimensions &\n7 (15 from Fortran 2008) &\nUnlimited &\n7 &\n2\n\\\\\n\\hline\nVector declaration &\n\\code{real,dimension(:)} &\n&\n\\code{Vector} &\n\\code{VectorXd}\n\\\\\nMatrix declaration &\n\\code{real,dimension(:,:)} &\n&\n\\code{Matrix} &\n\\code{MatrixXd, ArrayXd}\n\\\\\n3D array declaration &\n\\code{real,dimension(:,:,:)}&\n&\n\\code{Array3D}\n\\\\\nFixed matrix declaration &\n\\code{real,dimension(M,N)} &\n&\n\\code{FixedMatrix<double,false,M,N>} &\n\\code{Matrix<double,M,N>}\n\\\\\nDiagonal matrix declaration&\n&\n&\n\\code{DiagMatrix} &\n\\code{DiagonalMatrix<double,Dynamic>}\n\\\\\n%Tridiagonal matrix &\n%&\n%&\n%\\code{TridiagMatrix} &\n%\\\\\nSymmetric matrix decl.&\n&\n&\n\\code{SymmMatrix}\n\\\\\n%Upper-triangular matrix &\n%&\n%&\n%\\code{UpperMatrix} &\n%\\\\\nSparse matrix declaration&\n&\n%\\code{sparse(A)}\n&\n&\n\\code{SparseMatrix<double>}\n\\\\\n\\hline\nGet rank &\n\\code{rank(A)} &\n\\code{ndims(A)} &\n\\code{A::rank}\n\\\\\nGet total size &\n\\code{size(A)} &\n\\code{numel(A)} &\n\\code{A.size()} &\n\\code{A.size()}\n\\\\\nGet size of dimension &\n\\code{size(A,i)} &\n\\code{size(A,i)} &\n\\code{A.size(i)} &\n\\code{A.rows()}, \\code{A.cols()}\n\\\\\nGet all dimensions &\n\\code{shape(A)} &\n\\code{size(A)} &\n\\code{A.dimensions()}\n\\\\\n\\hline\nResize &\n\\code{allocate(A(m,n))} &\n\\code{A = zeros(m,n)} &\n\\code{A.resize(m,n)} &\n\\code{A.resize(m,n)} \n\\\\\nClear &\n\\code{deallocate(A)} &\n\\code{A = []} &\n\\code{A.clear()} &\n\\code{A.resize(0,0)}\n\\\\\nLink/associate &\n\\code{A => B} &\n&\n\\code{A >{}>= B} &\n%Low-level access via \\code{Map}\n(Complicated)\n\\\\\n\\hline\nSet elements to constant &\n\\code{A = x} &\n\\code{A(:) = x} &\n\\code{A = x} &\n\\code{A.fill(x)}\n\\\\\nFill vector with data &\n\\code{v = [0,1]} &\n\\code{v = [0,1]} &\n\\code{v <{}< 0,1} &\n\\code{v <{}< 0,1}\n\\\\\nFill matrix with data &\n\\code{A=reshape([0,1,2,3],[2,2])} &\n\\code{A = [1 2; 3 4]} &\n\\code{A <{}< 1,2,3,4} or \\code{A = \\{\\{1,2\\},\\{3,4\\}\\}} &\n\\code{A <{}< 1,2,3,4}\n\\\\\nVector literal &\n\\code{[1.0, 2.0]} &\n\\code{[1.0 2.0]} &\n\\code{Vector\\{1.0, 2.0\\}} &\n\\\\\n\\hline\nVector subset &\n\\code{v(i1:i2)} &\n\\code{v(i1:i2)} &\n\\code{v.subset(i1,i2)} &\n\\code{v.segment(i1,m)}\n%\\code{Map<VectorXd> w(v.data()+1,8)}\n\\\\\nStrided indexing &\n\\code{v(i1:i2:s)} &\n\\code{v(i1:s:i2)} &\n\\code{v(stride(i1,i2,s))} &\n%\\code{Map<VectorXd,0,InnerStride<> > w(v.data()+1,4,InnerStride<2>)}\n(Complicated)\n\\\\\nVector end indexing &\n\\code{v(i:)} &\n\\code{v(i:end)} &\n\\code{v.subset(i,end)} &\n\\code{v.tail(n)}\n\\\\\nIndex relative to end &\n&\n\\code{v(end-1)} &\n\\code{v(end-1)} &\n\\\\\nIndex by int vector &\n\\code{v(index)} &\n\\code{v(index)} &\n\\code{v(index)}\n\\\\\n\\hline\nMatrix subset &\n\\code{A(i1:i2,j1:j2)} &\n\\code{A(i1:i2,j1:j2)} &\n\\code{A.subset(i1,i2,j1,j2)} &\n\\code{A.block(i1,j1,m,n)}\n\\\\\nExtract row &\n\\code{A(i,:)} &\n\\code{A(i,:)} &\n\\code{A(i,\\_\\_)}, \\code{A[i]} &\n\\code{A.row(i)}\n\\\\\nMatrix end block &\n\\code{M(i:,j:)} &\n\\code{M(i:end,j:end)} &\n\\code{M.subset(i,end,j,end)} &\n\\code{M.bottomRightCorner(m,n)}\n\\\\\nDiagonal matrix from vector &\n&\n\\code{diag(v)} &\n\\code{v.diag\\_matrix()} &\n\\code{v.asDiagonal()}\n\\\\\nMatrix diagonals as vector &\n&\n\\code{diag(A)} &\n\\code{A.diag\\_vector()} &\n\\code{A.diagonal()} \n\\\\\nMatrix off-diagonals &\n&\n\\code{diag(A,i)} &\n\\code{A.diag\\_vector(i)} &\n\\code{A.diagonal(i)} \n%\\\\\n%Symmetric view &\n%&\n%&\n%\\code{%\\color{red}\n%A.symm\\_matrix<UPPER>()\n%}&\n%\\code{A.selfAdjointView<Upper>()}\n%\\\\\n%Upper-triangular view &\n%&\n%&\n%\\code{\\color{red}A.upper\\_matrix()} &\n%\\code{A.triangularView<Upper>()}\n\\\\\n\\hline\nElementwise multiplication &\n\\code{A * B} & \n\\code{A .* B} &\n\\code{A * B} &\n\\code{A.array() * B.array()}\n\\\\\nElemental function &\n\\code{sqrt(A)} &\n\\code{sqrt(A)} &\n\\code{sqrt(A)} &\n\\code{A.array().sqrt()}\n\\\\\nAddition assignment &\n\\code{A = A + B} &\n\\code{A = A + B} &\n\\code{A += B} &\n\\code{A.array() += B}\n\\\\\nPower &\n\\code{A ** B} &\n\\code{A .\\textasciicircum\\ C} &\n\\code{pow(A,B)} &\n\\code{A.array().pow(B)}\n\\\\\n\\hline\nMatrix multiplication &\n\\code{matmul(A,B)} &\n\\code{A * B} &\n\\code{A ** B} &\n\\code{A * B}\n\\\\\nDot product &\n\\code{dot\\_product(v,w)} &\n\\code{dot(v,w)} &\n\\code{dot\\_product(v,w)} &\n\\code{v.dot(w)}\n\\\\\nMatrix transpose &\n\\code{transpose(A)} &\n\\code{A'} &\n\\code{A.T()} &\n\\code{A.transpose()}\n\\\\\nIn-place transpose &\n&\n&\n\\code{A.in\\_place\\_transpose()} &\n\\code{A.transposeInPlace()}\n\\\\\nMatrix solve &\n&\n\\code{A \\textbackslash\\ b} &\n\\code{solve(A,b)} &\n\\code{A.colPivHouseholderQr().solve(b)}\n\\\\\nMatrix inverse &\n&\n\\code{inv(A)} &\n\\code{inv(A)} &\n\\code{A.inverse()}\n\\\\\n\\hline\n``Find'' conditional assign &\n&\n\\code{v(find(w<0)) = 0} &\n\\code{v(find(w<0)) = 0}\n\\\\\n``Where'' conditional assign &\n\\code{where(w<0) v = 0} &\n&\n\\code{v.where(w<0) = 0} &\n\\code{v = (w<0).select(0,v)}\n\\\\\n``Where'' with both cases &\n\\code{...elsewhere v = 1} &\n&\n\\code{v.where(w<0)=either\\_or(0,1)} &\n\\code{v = (w<0).select(0,1)}\n\\\\\n\\hline\nAverage all elements &\n\\code{mean(A)} & \n\\code{mean(A(:)} &\n\\code{mean(A)} &\n\\code{A.mean()}\n\\\\\nAverage along dimension &\n\\code{mean(A,i)} & \n\\code{mean(A,i)} &\n\\code{mean(A,i)} &\n\\code{A.colwise().mean()}\n\\\\\nMaximum of all elements &\n\\code{maxval(A)} &\n\\code{max(A(:))} &\n\\code{maxval(A)} &\n\\code{A.maxCoeff()}\n\\\\\nMaximum of two arrays &\n\\code{max(A,B)} &\n(Complicated) &\n\\code{max(A,B)}, \\code{fmax(A,B)} &\n\\code{A.max(B)}\n\\\\\nSpread along new dimension &\n\\code{spread(A,dim,n)} &\n&\n\\code{spread<dim>(A,n)}\n\\\\\n\\hline\n\\end{tabular}\n\\end{center}\n\\end{table}\n\\end{document}\n"
  },
  {
    "path": "include/Makefile.am",
    "content": "include_HEADERS = adept.h adept_arrays.h adept_optimize.h adept_source.h adept_fortran.h\n\npkginclude_HEADERS = adept/Active.h adept/ActiveReference.h adept/Allocator.h \\\n\tadept/Array.h adept/Expression.h adept/ExpressionSize.h \\\n\tadept/IndexedArray.h adept/matmul.h adept/RangeIndex.h \\\n\tadept/ScratchVector.h adept/SpecialMatrix.h adept/Stack.h \\\n\tadept/StackStorage.h adept/StackStorageOrig.h \\\n\tadept/StackStorageOrigStl.h adept/Statement.h adept/Storage.h \\\n\tadept/array_shortcuts.h adept/base.h adept/reduce.h \\\n\tadept/contiguous_matrix.h adept/exception.h adept/settings.h \\\n\tadept/interp.h adept/ActiveConstReference.h adept/cppblas.h \\\n\tadept/scalar_shortcuts.h adept/solve.h adept/traits.h adept/where.h \\\n\tadept/vector_utilities.h adept/FixedArray.h adept/Packet.h \\\n\tadept/UnaryOperation.h adept/BinaryOperation.h adept/ArrayWrapper.h \\\n\tadept/outer_product.h adept/spread.h adept/inv.h adept/eval.h \\\n\tadept/noalias.h adept/store_transpose.h adept/quick_e.h \\\n\tadept/GradientIndex.h adept/Optimizable.h adept/Minimizer.h\n\nEXTRA_DIST = Timer.h create_adept_source_header\n\nadept_source.h: @top_srcdir@/adept/*.h @top_srcdir@/adept/*.cpp @srcdir@/create_adept_source_header\n\t@srcdir@/create_adept_source_header\nall-local: adept_source.h\n"
  },
  {
    "path": "include/Timer.h",
    "content": "/* Timer.h - Utility class for timing different parts of a program\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#ifndef Timer_H\n#define Timer_H 1\n\n#ifdef _WIN32\n#include <windows.h>\n#include <time.h>\n#else\n#include <sys/time.h>\n#endif\n\n#include <map>\n#include <string>\n#include <sstream>\n#include <vector>\n#include <iostream>\n\n// The Timer class: all functions are inline\nclass Timer {\npublic:\n  typedef int TimerInt;\n\n  // Constructor can specify a number of unnamed activities\n  Timer(TimerInt n_activities = 0) \n    : current_activity_(-1), timer_on_(false), print_on_exit_(false) {\n#ifdef _WIN32\n    win_last_time_.QuadPart = 0;\n#else\n    last_time_.tv_sec = 0;\n    last_time_.tv_usec = 0;\n#endif\n    timings_.reserve(100);\n    names_.reserve(100);\n    for (TimerInt i = 0; i < n_activities; i++) {\n      std::stringstream s;\n      s << \"Activity \" << i;\n      timings_.push_back(0.0);\n      names_.push_back(s.str());\n    }\n  }\n\n  // When the timer is destructed (typically at program exit), print\n  // out the times spent in each activity\n  ~Timer() {\n    if (print_on_exit_) {\n      print();\n    }\n  }\n\n  // Print out the times spent in each activity\n  void print() {\n    double sum = 0.0;\n    std::cerr << size() << \" activities:\\n\";\n    for (TimerInt i = 0; i < size(); i++) {\n      std::cerr.width(10);\n      std::cerr << std::right << timings_[i] << \" s: \" << names_[i] << \"\\n\";\n      sum += timings_[i];\n    }\n    std::cerr.width(10);\n    std::cerr << std::right << sum << \" s: Total\\n\";\n  }\n\n  // Register a new activity with the specified name, returning the\n  // tag to be used to specify it in future, as a TimerInt\n  TimerInt new_activity(const std::string& name) {\n    TimerInt tag = size();\n    names_.push_back(name);\n    timings_.push_back(0.0);\n    return tag;\n  }\n\n  // Stop timing current activity\n  void stop() {\n    if (timer_on_) {\n      timings_[current_activity_] += split_();\n    }\n    timer_on_ = false;\n  };\n\n  // Start timing specified activity\n  void start(TimerInt activity) {\n    if (timer_on_) {\n      timings_[current_activity_] += split_();\n    }\n    else {\n      split_();\n    }\n\n    if (activity >= 0 && activity < size()) {\n      current_activity_ = activity;\n      timer_on_ = true;\n    }\n    else {\n      // Activity out of range - to keep this inline function fast we\n      // don't throw an exception but just don't record the time for\n      // this event\n      timer_on_ = false;\n    }\n  };\n\n  // Set the timing for a specific activity back to zero\n  void reset(TimerInt activity) {\n    if (activity >= 0 && activity < size()) {\n      timings_[activity] = 0.0;\n    }\n  }\n\n  // Return the list of timings in seconds as a constant reference to\n  // a vector of doubles\n  const std::vector<double>& timings() { return timings_; }\n\n  // Return a single timing\n  double timing(TimerInt activity) {\n    if (activity >= 0 && activity < size()) {\n      return timings_[activity];\n    }\n    else {\n      return 0.0;\n    }\n  }\n\n  // Convert from size_t to int\n  TimerInt size() {\n    return timings_.size();\n  }\n\n  // Decide whether the contents of the timer class will be printed\n  // when it is destructed\n  void print_on_exit(bool b = true) {\n    print_on_exit_ = b;\n  }\n\nprivate:\n  // Use Unix system call to get the time accurately\n  double split_() {\n#ifdef _WIN32\n    using namespace std;\n    QueryPerformanceFrequency(&frequency);\n    QueryPerformanceCounter(&win_time_);\n    double dsec = (double) (win_time_.QuadPart - win_last_time_.QuadPart)\n      / (double) frequency.QuadPart;\n    win_last_time_ = win_time_;\n    return dsec;\n#else\n    struct timeval time;\n    gettimeofday(&time, NULL);\n    double dsec = time.tv_sec - last_time_.tv_sec\n      + 0.000001 * (time.tv_usec - last_time_.tv_usec);\n    last_time_ = time;\n    return dsec;\n#endif\n  }\n  // Data\n  std::vector<double> timings_;\n  std::vector<std::string> names_;\n  TimerInt current_activity_;\n#ifdef _WIN32\n  LARGE_INTEGER frequency;                 // ticks per second\n  LARGE_INTEGER win_time_, win_last_time_; // ticks\n#else\n  timeval last_time_;\n#endif\n  bool timer_on_;\n  bool print_on_exit_;\n};\n\n#endif\n"
  },
  {
    "path": "include/adept/Active.h",
    "content": "/* Active.h -- Active scalar type for automatic differentiation\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015-2018 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n  \n   The Active class describes a scalar variable that can participate\n   in expressions to be differentiated. It is a generalization of the\n   aReal (or adouble) class in Adept 1.0, which was always double\n   precision; Active<T> takes a template argument T that is any\n   floating-point type.\n\n*/\n\n#ifndef AdeptActive_H\n#define AdeptActive_H\n\n#include <iostream>\n#include <vector>\n\n#include <adept/Expression.h>\n#include <adept/exception.h>\n#include <adept/ExpressionSize.h>\n#include <adept/Stack.h>\n\nnamespace adept {\n\n  // ---------------------------------------------------------------------\n  // Definition of Active class\n  // ---------------------------------------------------------------------\n  template <typename Type>\n  class Active : public Expression<Type, Active<Type> > {\n    // CONTENTS\n    // 1. Preamble\n    // 2. Constructors\n    // 3. Operators\n    // 4. Public member functions that don't modify the object\n    // 5. Public member functions that modify the object\n    // 6. Protected member functions\n    // 7. Data\n\n  public:\n    // -------------------------------------------------------------------\n    // 1. Preamble\n    // -------------------------------------------------------------------\n\n    // Static definitions to enable the properties of this type of\n    // expression to be discerned at compile time\n    static const bool is_active = true;\n    static const bool is_lvalue = true;\n    static const int  rank      = 0;\n    static const int  n_active  = 1 + internal::is_complex<Type>::value;\n    static const int  n_arrays  = 0;\n    static const int  n_scratch = 0;\n    typedef Type T; // Needed so that ADEPT_INIT_REAL_SNAN works\n\n    // -------------------------------------------------------------------\n    // 2. Constructors\n    // -------------------------------------------------------------------\n\n    // Constructor registers the new Active object with the currently\n    // active stack.  Note that this object is not explicitly\n    // initialized with a particular number; the user should not\n    // assume that it is set to zero but should later assign it to a\n    // particular value. Otherwise in the reverse pass the\n    // corresponding gradient will not be set to zero.\n#ifdef ADEPT_INIT_REAL\n    Active()\n      : val_(ADEPT_INIT_REAL), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { }\n#else\n    Active()\n      : val_(0.0), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { }\n#endif\n\n    // Constructor with a passive argument; this constructor is\n    // invoked with either of the following:\n    //   aReal x = 1.0;\n    //   aReal x(1.0);\n    template <typename PType>\n    Active(const PType& rhs,\n\t   typename internal::enable_if<internal::is_not_expression<PType>::value>::type* dummy = 0)\n      : val_(rhs), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient())\n    {\n      // By pushing this to the statement stack without pushing\n      // anything on to the operation stack we ensure that in the\n      // reverse pass the gradient of this object will be set to zero\n      // after it has been manipulated. This is important because the\n      // gradient entry might be reused.\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // Constructor taking an element from an active array: the value\n    // and gradient_index of the element are provided\n    template <typename PType>\n    Active(const PType& rhs, Index gradient_index)\n      : val_(rhs), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient())\n    {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\tADEPT_ACTIVE_STACK->push_rhs(1.0,gradient_index);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n   \n    // Constructor with an active argument\n\n    // Normal copy construction: register the new object then treat\n    // this as an assignment.  We need two versions because if we\n    // don't provide the first then the compiler will provide it and\n    // not use the second if Type==AType\n    Active(const Active<Type>& rhs) \n      : val_(0.0), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient())\n    {\n      *this = rhs;\n    }\n    template <typename AType>\n    Active(const Active<AType>& rhs) \n      : val_(0.0), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient())\n    {\n      *this = rhs;\n    }\n\n    // Construction with an expression.  This is primarily used so\n    // that if we define a function func(aReal a), it will also accept\n    // active expressions by implicitly converting them to an aReal.\n    template<typename AType, class E>\n    //          explicit\n    Active(const Expression<AType, E>& rhs,\n\t   typename internal::enable_if<E::rank==0\n\t\t\t      && E::is_active>::type* dummy = 0)\n      : gradient_index_(ADEPT_ACTIVE_STACK->register_gradient())\n    {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n      \t// Check there is enough space in the operation stack\n\tADEPT_ACTIVE_STACK->check_space_static<E::n_active>();\n#endif\n\t// Get the value and push the gradients on to the operation\n\t// stack, thereby storing the right-hand-side of the statement\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\t// Push the gradient offet of this object on to the statement\n\t// stack, thereby storing the left-hand-side of the statement\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n    }\n\t   \n    // Destructor simply unregisters the object from the stack,\n    // freeing up the gradient index for another\n    ~Active() {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\n\tADEPT_ACTIVE_STACK->unregister_gradient(gradient_index_);\n\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n\n    // -------------------------------------------------------------------\n    // 3. Operators\n    // -------------------------------------------------------------------\n\t   \n    // Assignment operator with an inactive variable on the rhs\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value,\n\t\t       Active&>::type\n    operator=(const PType& rhs) {\n      val_ = rhs;\n      // Pushing the gradient index on to the statement stack with no\n      // corresponding operations ensures that the gradient will be\n      // set to zero in the reverse pass when it is finished with\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n      return *this;\n    }\n\n    // Assignment operator with an active variable on the rhs: first a\n    // non-template version because otherwise compiler will generate\n    // its own\n    Active& operator=(const Active& rhs) {\n      // Check there is space in the operation stack for one more\n      // entry\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tADEPT_ACTIVE_STACK->check_space(1);\n#endif\n\t// Same as construction with an expression (defined above)\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n      return *this; \n    }\n\n    // Assignment operator with an active variable on the rhs\n    template <class AType>\n    Active& operator=(const Active<AType>& rhs) {\n      // Check there is space in the operation stack for one more\n      // entry\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tADEPT_ACTIVE_STACK->check_space(1);\n#endif\n\t// Same as construction with an expression (defined above)\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n      return *this;\n    }\n    \n    // Assignment operator with an expression on the rhs: very similar\n    // to construction with an expression (defined above)\n    template <typename AType, class E>\n    typename internal::enable_if<E::is_active && E::rank==0, Active&>::type\n    operator=(const Expression<AType, E>& rhs) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tADEPT_ACTIVE_STACK->check_space_static<E::n_active>();\n#endif\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n      return *this;\n    }\n  \n    // All the compound assignment operators are unpacked, i.e. a+=b\n    // becomes a=a+b; first for an Expression on the rhs\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, Active&>::type\n    operator+=(const Expression<AType,E>& rhs) {\n      return *this = (*this + rhs);\n    }\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, Active&>::type\n    operator-=(const Expression<AType,E>& rhs) {\n      return *this = (*this - rhs);\n    }\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, Active&>::type\n    operator*=(const Expression<AType,E>& rhs) {\n      return *this = (*this * rhs);\n    }\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, Active&>::type\n    operator/=(const Expression<AType,E>& rhs) {\n      return *this = (*this / rhs);\n    }\n\n    // And likewise for a passive scalar on the rhs\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, Active&>::type\n    operator+=(const PType& rhs) {\n      val_ += rhs;\n      return *this;\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, Active&>::type\n    operator-=(const PType& rhs) {\n      val_ -= rhs;\n      return *this;\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, Active&>::type\n    operator*=(const PType& rhs) {\n      return *this = (*this * rhs);\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, Active&>::type\n    operator/=(const PType& rhs) {\n      return *this = (*this / rhs);\n    }\n\n      \n    // -------------------------------------------------------------------\n    // 4. Public member functions that don't modify the object\n    // -------------------------------------------------------------------\n\n    // Get the underlying passive value of this object\n    Type value() const {\n      return val_; \n    }\n\n    // Get the index of the gradient information for this object\n    const Index& gradient_index() const { return gradient_index_; }\n\n    // If an expression leads to calc_gradient being called on an\n    // active object, we push the multiplier and the gradient index on\n    // to the operation stack (or 1.0 if no multiplier is specified\n    template <int Rank>\n    void calc_gradient(Stack& stack, const ExpressionSize<Rank>&) const {\n      stack.push_rhs(1.0, gradient_index_);\n    }\n\n    template <int Rank, typename MyType>\n    void calc_gradient(Stack& stack, const MyType& multiplier, \n\t\t       const ExpressionSize<Rank>&) const {\n      stack.push_rhs(multiplier, gradient_index_);\n    }\n\n    // Set the value of the gradient, for initializing an adjoint;\n    // note that the value of the gradient is not held in the active\n    // object but rather held by the stack\n    template <typename MyType>\n    void set_gradient(const MyType& gradient) const {\n      return ADEPT_ACTIVE_STACK->set_gradients(gradient_index_,\n\t\t\t\t\t       gradient_index_+1, \n\t\t\t\t\t       &gradient);\n    }\n\n    // Get the value of the gradient, for extracting the adjoint after\n    // calling reverse() on the stack\n    template <typename MyType>\n    void get_gradient(MyType& gradient) const {\n      return ADEPT_ACTIVE_STACK->get_gradients(gradient_index_,\n\t\t\t\t\t       gradient_index_+1, &gradient);\n    }\n    Type get_gradient() const {\n      Type gradient = 0;\n      ADEPT_ACTIVE_STACK->get_gradients(gradient_index_,\n\t\t\t\t\tgradient_index_+1, &gradient);\n      return gradient;\n    }\n \n\n    // For modular codes, some modules may have an existing\n    // Jacobian code and possibly be unsuitable for automatic\n    // differentiation using Adept (e.g. because they are written in\n    // Fortran).  In this case, we can use the following two functions\n    // to \"wrap\" the non-Adept code.\n\n    // Suppose the non-adept code uses the double values from n aReal\n    // objects pointed to by \"x\" to produce a single double value\n    // \"y_val\" (to be assigned to an aReal object \"y\"), plus a pointer\n    // to an array of forward derivatives \"dy_dx\".  Firstly you should\n    // assign the value using simply \"y = y_val;\", then call\n    // \"y.add_derivative_dependence(x, dy_dx, n);\" to specify how y\n    // depends on x. A fourth argument \"multiplier_stride\" may be used\n    // to stride the indexing to the derivatives, in case they are\n    // part of a matrix that is oriented in a different sense.\n    template <typename MyReal>\n    typename internal::enable_if<internal::is_floating_point<MyReal>::value,\n\t\t       void>::type\n    add_derivative_dependence(const Active* rhs,\n\t\t\t      const MyReal* multiplier,\n\t\t\t      int n, \n\t\t\t      int multiplier_stride = 1) const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tADEPT_ACTIVE_STACK->check_space(n);\n#endif\n\tfor (int i = 0; i < n; i++) {\n\t  Real mult = multiplier[i*multiplier_stride];\n\t  if (mult != 0.0) {\n\t    // For each non-zero multiplier, add a pseudo-operation to\n\t    // the operation stack\n\t    ADEPT_ACTIVE_STACK->push_rhs(mult,\n\t\t\t\t\t rhs[i].gradient_index());\n\t  }\n\t}\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // Suppose the non-Adept code uses double values from n aReal\n    // objects pointed to by \"x\" and m aReal objects pointed to by \"z\"\n    // to produce a single double value, plus pointers to arrays of\n    // forward derivatives \"dy_dx\" and \"dy_dz\".  Firstly, as above,\n    // you should assign the value using simply \"y = y_val;\", then\n    // call \"y.add_derivative_dependence(x, dy_dx, n);\" to specify how\n    // y depends on x.  To specify also how y depends on z, call\n    // \"y.append_derivative_dependence(z, dy_dz, n);\".\n    template <typename MyReal>\n    typename internal::enable_if<internal::is_floating_point<MyReal>::value,\n\t\t       void>::type\n    append_derivative_dependence(const Active* rhs,\n\t\t\t\t const MyReal* multiplier,\n\t\t\t\t int n,\n\t\t\t\t int multiplier_stride = 1) const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tADEPT_ACTIVE_STACK->check_space(n);\n#endif\n\tfor (int i = 0; i < n; ++i) {\n\t  Real mult = multiplier[i*multiplier_stride];\n\t  if (mult != 0.0) {\n\t    // For each non-zero multiplier, add a pseudo-operation to\n\t    // the operation stack\n\t    ADEPT_ACTIVE_STACK->push_rhs(mult,\n\t\t\t\t\t rhs[i].gradient_index());\n\t  }\n\t}\n\tif (!(ADEPT_ACTIVE_STACK->update_lhs(gradient_index_))) {\n\t  throw wrong_gradient(\"Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call\"\n\t\t\t       ADEPT_EXCEPTION_LOCATION);\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // For only one independent variable on the rhs, these two\n    // functions are convenient as they don't involve pointers\n    template <class T>\n    void add_derivative_dependence(const T& rhs, Real multiplier) const {\n      ADEPT_ACTIVE_STACK->add_derivative_dependence(gradient_index_,\n\t\t\t\t\t\t    rhs.gradient_index(),\n\t\t\t\t\t\t    multiplier);\n    }\n    template <class T>\n    void append_derivative_dependence(const T& rhs, Real multiplier) const {\n      ADEPT_ACTIVE_STACK->append_derivative_dependence(gradient_index_,\n\t\t\t\t\t\t       rhs.gradient_index(),\n\t\t\t\t\t\t       multiplier);\n    }\n \n    // -------------------------------------------------------------------\n    // 4.1. Public member functions used by other expressions\n    // -------------------------------------------------------------------\n    bool get_dimensions_(ExpressionSize<0>& dim) const { return true; }\n\n    std::string expression_string_() const {\n      std::stringstream s;\n      s << \"Active(\" << val_ << \")\";\n      return s.str();\n    }\n\n    bool is_aliased_(const Type* mem1, const Type* mem2) const { \n      return false;\n    }\n\n    Type value_with_len_(const Index& j, const Index& len) const\n    { return val_; }\n\n    template <int MyArrayNum, int NArrays>\n    void advance_location_(ExpressionSize<NArrays>& loc) const { } \n\n    template <int MyArrayNum, int NArrays>\n    Type value_at_location_(const ExpressionSize<NArrays>& loc) const\n    { return val_; }\n    \n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\tinternal::ScratchVector<NScratch>& scratch) const\n    { return val_; }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t     const internal::ScratchVector<NScratch>& scratch) const\n    { return val_; }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    void calc_gradient_(Stack& stack, \n\t\t\tconst ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch) const {\n      stack.push_rhs(1.0, gradient_index_);\n    }\n\n    template <int MyArrayNum, int MyScratchNum, \n\t      int NArrays, int NScratch, typename MyType>\n    void calc_gradient_(Stack& stack, \n\t\t\tconst ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch,\n\t\t\tconst MyType& multiplier) const {\n      stack.push_rhs(multiplier, gradient_index_);\n    }\n\n    template <int MyArrayNum, int Rank, int NArrays>\n    void set_location_(const ExpressionSize<Rank>& i, \n\t\t       ExpressionSize<NArrays>& index) const {}\n\n\n    // The Stack::independent(x) and Stack::dependent(y) functions add\n    // the gradient_index of objects x and y to std::vector<uIndex>\n    // objects in Stack. Since x and y may be scalars or arrays, this\n    // is best done by delegating to the Active or Array classes.\n    template <typename IndexType>\n    void push_gradient_indices(std::vector<IndexType>& vec) const {\n      vec.push_back(gradient_index_);\n    }\n\n    // -------------------------------------------------------------------\n    // 5. Public member functions that modify the object\n    // -------------------------------------------------------------------\n\n    // Set the value \n    template <typename MyType>\n    void set_value(const MyType& x) { val_ = x; }\n\n    // For use in creating active references, to get a non-const\n    // reference to the underlying passive data\n    Type& lvalue() { return val_; }\n\n    \n    // -------------------------------------------------------------------\n    // 6. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n    \n    // -------------------------------------------------------------------\n    // 7. Data\n    // -------------------------------------------------------------------\n  private:\n    Type val_;                     // The numerical value\n    Index gradient_index_;         // Index to where the corresponding\n\t\t\t\t   // gradient will be held during the\n\t\t\t\t   // adjoint calculation\n  }; // End of definition of Active\n\n\n  // ---------------------------------------------------------------------\n  // Helper function for Active class\n  // ---------------------------------------------------------------------\n\n  // A way of setting the initial values of an array of n aReal\n  // objects without the expense of placing them on the stack\n  template<typename Type>\n  inline\n  void set_values(Active<Type>* a, Index n, const Type* data)\n  {\n    for (Index i = 0; i < n; i++) {\n      a[i].set_value(data[i]);\n    }\n  }\n\n  // Extract the values of an array of n aReal objects\n  template<typename Type>\n  inline\n  void get_values(const Active<Type>* a, Index n, Type* data)\n  {\n    for (Index i = 0; i < n; i++) {\n      data[i] = a[i].value();\n    }\n  }\n  \n  // Set the initial gradients of an array of n aReal objects; this\n  // should be done after the algorithm has called and before the\n  // Stack::forward or Stack::reverse functions are called\n  template<typename Type>\n  inline\n  void set_gradients(Active<Type>* a, Index n, const Type* data)\n  {\n    for (Index i = 0; i < n; i++) {\n      a[i].set_gradient(data[i]);\n    }\n  }\n  \n  // Extract the gradients from an array of aReal objects after the\n  // Stack::forward or Stack::reverse functions have been called\n  template<typename Type>\n  inline\n  void get_gradients(const Active<Type>* a, Index n, Type* data)\n  {\n    for (Index i = 0; i < n; i++) {\n      a[i].get_gradient(data[i]);\n    }\n  }\n\n  // Print an active scalar to a stream\n  template<typename Type>\n  inline\n  std::ostream&\n  operator<<(std::ostream& os, const Active<Type>& v)\n  {\n    os << v.value();\n    return os;\n  }\n\n  // Print an active scalar expression to a stream\n  template <typename Type, class E>\n  inline\n  typename internal::enable_if<E::rank == 0 && E::is_active, std::ostream&>::type\n  operator<<(std::ostream& os, const Expression<Type,E>& expr) {\n    os << expr.scalar_value();\n    return os;\n  }\n\n  namespace internal {\n    // ---------------------------------------------------------------------\n    // Definition of active_scalar\n    // ---------------------------------------------------------------------\n    \n    // Return the active scalar version of Type if it is active,\n    // otherwise just return Type\n    \n    template <class Type, bool IsActive> struct active_scalar {\n      typedef Type type;\n    };\n\n    template <class Type> struct active_scalar<Type, true> {\n      typedef Active<Type> type;\n    };\n\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/ActiveConstReference.h",
    "content": "/* ActiveConstReference.h -- Const reference to an active element of an array\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   Provide an active scalar type where the data is actually a\n   reference to an element of array. This enables an active array to\n   be indexed such that the returned value can be used as an r-value\n   and participate in expressions to be differentiated.\n\n*/\n\n#ifndef AdeptActiveConstReference_H\n#define AdeptActiveConstReference_H\n\n#include <iostream>\n#include <vector>\n\n#include <adept/Active.h>\n\nnamespace adept {\n\n  // ---------------------------------------------------------------------\n  // Definition of ActiveReference class\n  // ---------------------------------------------------------------------\n  template <typename Type>\n  class ActiveConstReference : public Expression<Type, ActiveConstReference<Type> > {\n    // CONTENTS\n    // 1. Preamble\n    // 2. Constructors\n    // 3. Operators\n    // 4. Public member functions that don't modify the object\n    // 5. Public member functions that modify the object\n    // 6. Protected member functions\n    // 7. Data\n\n  public:\n    // -------------------------------------------------------------------\n    // 1. Preamble\n    // -------------------------------------------------------------------\n\n    // Static definitions to enable the properties of this type of\n    // expression to be discerned at compile time\n    static const bool is_active = true;\n    static const int  rank      = 0;\n    static const int  n_active  = 1 + internal::is_complex<Type>::value;\n    static const int  n_arrays  = 0;\n    static const int  n_scratch = 0;\n\n    // -------------------------------------------------------------------\n    // 2. Constructors\n    // -------------------------------------------------------------------\n\n  private:\n    // There is only one way to construct an ActiveConstReference, so all\n    // others that would otherwise be generated by the compiler are\n    // made inaccessible\n    ActiveConstReference() { }\n\n  public:\n\n    ActiveConstReference(const ActiveConstReference& rhs)\n      : val_(rhs.value()), gradient_index_(rhs.gradient_index()) { }\n    \n    // In order to initialize this object, we pass in the gradient\n    // index from the location in the array as the first argument.\n    ActiveConstReference(const Type& val, Index gradient_index)\n      : val_(val), gradient_index_(gradient_index) { \n    }\n\n    /*\n    ActiveConstReference(const ActiveConstReference& rhs)\n      : val_(const_cast<ActiveConstReference<Type>&>(rhs).lvalue()),\n\tgradient_index_(rhs.gradient_index()) { }\n    */\n\n    // Destructor does not unregister the object from the stack since\n    // it is not the only reference to it.\n    ~ActiveConstReference() { }\n\n\n    // -------------------------------------------------------------------\n    // 3. Operators\n    // -------------------------------------------------------------------\n\t   \n    // Assignment operator with an active variable on the rhs: first a\n    // non-template version because otherwise compiler will generate\n    // its own; must be inaccessible\n  private:\n    ActiveConstReference& operator=(const ActiveConstReference& rhs) { }\n\n  public:\n    // -------------------------------------------------------------------\n    // 4. Public member functions that don't modify the object\n    // -------------------------------------------------------------------\n\n    // Get the underlying passive value of this object\n    const Type& value() const {\n      return val_; \n    }\n\n    // Get the index of the gradient information for this object\n    const Index& gradient_index() const { return gradient_index_; }\n\n    // If an expression leads to calc_gradient being called on an\n    // active object, we push the multiplier and the gradient index on\n    // to the operation stack (or 1.0 if no multiplier is specified)\n    template <int Rank>\n    void calc_gradient(Stack& stack, const ExpressionSize<Rank>&) const {\n      stack.push_rhs(1.0, gradient_index_);\n    }\n\n    template <int Rank, typename MyType>\n    void calc_gradient(Stack& stack, const MyType& multiplier, \n\t\t       const ExpressionSize<Rank>&) const {\n      stack.push_rhs(multiplier, gradient_index_);\n    }\n\n    // Set the value of the gradient, for initializing an adjoint;\n    // note that the value of the gradient is not held in the active\n    // object but rather held by the stack\n    template <typename MyType>\n    void set_gradient(const MyType& gradient) const {\n      return ADEPT_ACTIVE_STACK->set_gradients(gradient_index_,\n\t\t\t\t\t       gradient_index_+1, \n\t\t\t\t\t       &gradient);\n    }\n\n    // Get the value of the gradient, for extracting the adjoint after\n    // calling reverse() on the stack\n    template <typename MyType>\n    void get_gradient(MyType& gradient) const {\n      return ADEPT_ACTIVE_STACK->get_gradients(gradient_index_,\n\t\t\t\t\t       gradient_index_+1, &gradient);\n    }\n    Type get_gradient() const {\n      Type gradient = 0;\n      ADEPT_ACTIVE_STACK->get_gradients(gradient_index_,\n\t\t\t\t\tgradient_index_+1, &gradient);\n      return gradient;\n    }\n \n\n    // For modular codes, some modules may have an existing\n    // Jacobian code and possibly be unsuitable for automatic\n    // differentiation using Adept (e.g. because they are written in\n    // Fortran).  In this case, we can use the following two functions\n    // to \"wrap\" the non-Adept code.\n\n    // Suppose the non-adept code uses the double values from n aReal\n    // objects pointed to by \"x\" to produce a single double value\n    // \"y_val\" (to be assigned to an aReal object \"y\"), plus a pointer\n    // to an array of forward derivatives \"dy_dx\".  Firstly you should\n    // assign the value using simply \"y = y_val;\", then call\n    // \"y.add_derivative_dependence(x, dy_dx, n);\" to specify how y\n    // depends on x. A fourth argument \"multiplier_stride\" may be used\n    // to stride the indexing to the derivatives, in case they are\n    // part of a matrix that is oriented in a different sense.\n    void add_derivative_dependence(const Active<Type>* rhs,\n\t\t\t\t   const Real* multiplier,\n\t\t\t\t   int n, \n\t\t\t\t   int multiplier_stride = 1) const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tADEPT_ACTIVE_STACK->check_space(n);\n#endif\n\tfor (int i = 0; i < n; i++) {\n\t  Real mult = multiplier[i*multiplier_stride];\n\t  if (mult != 0.0) {\n\t    // For each non-zero multiplier, add a pseudo-operation to\n\t    // the operation stack\n\t    ADEPT_ACTIVE_STACK->push_rhs(mult,\n\t\t\t\t\t rhs[i].gradient_index());\n\t  }\n\t}\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // Suppose the non-Adept code uses double values from n aReal\n    // objects pointed to by \"x\" and m aReal objects pointed to by \"z\"\n    // to produce a single double value, plus pointers to arrays of\n    // forward derivatives \"dy_dx\" and \"dy_dz\".  Firstly, as above,\n    // you should assign the value using simply \"y = y_val;\", then\n    // call \"y.add_derivative_dependence(x, dy_dx, n);\" to specify how\n    // y depends on x.  To specify also how y depends on z, call\n    // \"y.append_derivative_dependence(z, dy_dz, n);\".\n    void append_derivative_dependence(const Active<Type>* rhs,\n\t\t\t\t      const Real* multiplier,\n\t\t\t\t      int n,\n\t\t\t\t      int multiplier_stride = 1) const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tADEPT_ACTIVE_STACK->check_space(n);\n#endif\n\tfor (int i = 0; i < n; i++) {\n\t  Real mult = multiplier[i*multiplier_stride];\n\t  if (mult != 0.0) {\n\t    // For each non-zero multiplier, add a pseudo-operation to\n\t    // the operation stack\n\t    ADEPT_ACTIVE_STACK->push_rhs(mult,\n\t\t\t\t\t rhs[i].gradient_index());\n\t  }\n\t}\n\tif (!(ADEPT_ACTIVE_STACK->update_lhs(gradient_index_))) {\n\t  throw wrong_gradient(\"Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call\"\n\t\t\t       ADEPT_EXCEPTION_LOCATION);\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n    // For only one independent variable on the rhs, these two\n    // functions are convenient as they don't involve pointers\n    template <class T>\n    void add_derivative_dependence(T& rhs, Real multiplier) const {\n      ADEPT_ACTIVE_STACK->add_derivative_dependence(gradient_index_,\n\t\t\t\t\t\t    rhs.gradient_index(),\n\t\t\t\t\t\t    multiplier);\n    }\n    template <class T>\n    void append_derivative_dependence(T& rhs, Real multiplier) const {\n      ADEPT_ACTIVE_STACK->append_derivative_dependence(gradient_index_,\n\t\t\t\t\t\t       rhs.gradient_index(),\n\t\t\t\t\t\t       multiplier);\n    }\n \n    // -------------------------------------------------------------------\n    // 4.1. Public member functions used by other expressions\n    // -------------------------------------------------------------------\n    bool get_dimensions_(ExpressionSize<0>& dim) const { return true; }\n\n    std::string expression_string_() const {\n      std::stringstream s;\n      s << \"ActiveConstReference(\" << val_ << \")\";\n      return s.str();\n    }\n\n    bool is_aliased_(const Type* mem1, const Type* mem2) const { \n      return &val_ >= mem1 && &val_ <= mem2; \n    }\n\n    Type value_with_len_(const Index& j, const Index& len) const\n    { return val_; }\n\n    template <int MyArrayNum, int NArrays>\n    void advance_location_(ExpressionSize<NArrays>& loc) const { } \n\n    template <int MyArrayNum, int NArrays>\n    Type value_at_location_(const ExpressionSize<NArrays>& loc) const\n    { return val_; }\n    \n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\tinternal::ScratchVector<NScratch>& scratch) const\n    { return val_; }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t     const internal::ScratchVector<NScratch>& scratch) const\n    { return val_; }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    void calc_gradient_(Stack& stack, \n\t\t\tconst ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch) const {\n      stack.push_rhs(1.0, gradient_index_);\n    }\n\n    template <int MyArrayNum, int MyScratchNum, \n\t      int NArrays, int NScratch, typename MyType>\n    void calc_gradient_(Stack& stack, \n\t\t\tconst ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch,\n\t\t\tconst MyType& multiplier) const {\n      stack.push_rhs(multiplier, gradient_index_);\n    }\n\n    template <int MyArrayNum, int Rank, int NArrays>\n    void set_location_(const ExpressionSize<Rank>& i, \n\t\t       ExpressionSize<NArrays>& index) const {}\n\n\n    // The Stack::independent(x) and Stack::dependent(y) functions add\n    // the gradient_index of objects x and y to std::vector<uIndex>\n    // objects in Stack. Since x and y may be scalars or arrays, this\n    // is best done by delegating to the ActiveConstReference or Array classes.\n    template <typename IndexType>\n    void push_gradient_indices(std::vector<IndexType>& vec) const {\n      vec.push_back(gradient_index_);\n    }\n\n    // -------------------------------------------------------------------\n    // 5. Public member functions that modify the object\n    // -------------------------------------------------------------------\n\n    // Set the value \n    template <typename MyType>\n    void set_value(const MyType& x) { val_ = x; }\n    \n    // -------------------------------------------------------------------\n    // 6. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n    \n    // -------------------------------------------------------------------\n    // 7. Data\n    // -------------------------------------------------------------------\n  private:\n    const Type& val_;              // Reference to the numerical value\n    Index gradient_index_;         // Index to where the corresponding\n\t\t\t\t   // gradient will be held during the\n\t\t\t\t   // adjoint calculation\n  }; // End of definition of ActiveConstReference\n\n\n  // ---------------------------------------------------------------------\n  // Helper function for ActiveConstReference class\n  // ---------------------------------------------------------------------\n\n  template<typename Type>\n  inline\n  std::ostream&\n  operator<<(std::ostream& os, const ActiveConstReference<Type>& v)\n  {\n    os << v.value();\n    return os;\n  }\n\n\n  namespace internal {\n    \n    // ---------------------------------------------------------------------\n    // active_const_reference\n    // ---------------------------------------------------------------------\n\n    // Return the active reference version of Type if it is active,\n    // otherwise just return Type&\n\n    template <class Type, bool IsActive> struct active_const_reference {\n      typedef const Type& type;\n    };\n    template <class Type> struct active_const_reference<Type, true> {\n      typedef ActiveConstReference<Type> type;\n    };\n  }\n\n\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/ActiveReference.h",
    "content": "/* ActiveReference.h -- Reference to an active element of an array\n\n    Copyright (C) 2015-2018 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   Provide an active scalar type where the data is actually a\n   reference to an element of array. This enables an active array to\n   be indexed such that the returned value can be used as an l-value\n   and participate in expressions to be differentiated.\n\n*/\n\n#ifndef AdeptActiveReference_H\n#define AdeptActiveReference_H\n\n#include <iostream>\n#include <vector>\n\n#include <adept/Active.h>\n\nnamespace adept {\n\n  // ---------------------------------------------------------------------\n  // Definition of ActiveReference class\n  // ---------------------------------------------------------------------\n  template <typename Type>\n  class ActiveReference : public Expression<Type, ActiveReference<Type> > {\n    // CONTENTS\n    // 1. Preamble\n    // 2. Constructors\n    // 3. Operators\n    // 4. Public member functions that don't modify the object\n    // 5. Public member functions that modify the object\n    // 6. Protected member functions\n    // 7. Data\n\n  public:\n    // -------------------------------------------------------------------\n    // 1. Preamble\n    // -------------------------------------------------------------------\n\n    // Static definitions to enable the properties of this type of\n    // expression to be discerned at compile time\n    static const bool is_active = true;\n    static const int  rank      = 0;\n    static const int  n_active  = 1 + internal::is_complex<Type>::value;\n    static const int  n_arrays  = 0;\n    static const int  n_scratch = 0;\n\n    // -------------------------------------------------------------------\n    // 2. Constructors\n    // -------------------------------------------------------------------\n\n  private:\n    // There is only one way to construct an ActiveReference, so all\n    // others that would otherwise be generated by the compiler are\n    // made inaccessible\n    ActiveReference() { }\n\n    ActiveReference(ActiveReference& rhs)\n      : val_(rhs.lvalue()), gradient_index_(rhs.gradient_index()) { }\n\n  public:\n    \n    // In order to initialize this object, we pass in the gradient\n    // index from the location in the array as the first argument.\n    ActiveReference(Type& val, Index gradient_index)\n      : val_(val), gradient_index_(gradient_index) { \n    }\n\n    // \n    ActiveReference(const ActiveReference& rhs)\n      : val_(const_cast<ActiveReference<Type>&>(rhs).lvalue()),\n\tgradient_index_(rhs.gradient_index()) { }\n\n    // Destructor does not unregister the object from the stack since\n    // it is not the only reference to it.\n    ~ActiveReference() { }\n\n\n    // -------------------------------------------------------------------\n    // 3. Operators\n    // -------------------------------------------------------------------\n\t   \n    // Assignment operator with an inactive variable on the rhs\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value,\n\t\t       ActiveReference&>::type\n    operator=(const PType& rhs) {\n      val_ = rhs;\n      // Pushing the gradient index on to the statement stack with no\n      // corresponding operations ensures that the gradient will be\n      // set to zero in the reverse pass when it is finished with\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n      return *this;\n    }\n\n    // Assignment operator with an active variable on the rhs: first a\n    // non-template version because otherwise compiler will generate\n    // its own\n    ActiveReference& operator=(const ActiveReference& rhs) {\n      // Check there is space in the operation stack for one more\n      // entry\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tADEPT_ACTIVE_STACK->check_space(1);\n#endif\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n      return *this; \n    }\n\n    // Assignment operator with an active variable on the rhs\n    template <class AType>\n    ActiveReference& operator=(const Active<AType>& rhs) {\n      // Check there is space in the operation stack for one more\n      // entry\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tADEPT_ACTIVE_STACK->check_space(1);\n#endif\n\t// Same as construction with an expression (defined above)\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n      return *this;\n    }\n    \n    // Assignment operator with an expression on the rhs\n    template <typename AType, class E>\n    typename internal::enable_if<E::is_active && E::rank==0, ActiveReference&>::type\n    operator=(const Expression<AType, E>& rhs) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tADEPT_ACTIVE_STACK->check_space_static<E::n_active>();\n#endif\n\tval_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK);\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\tval_ = rhs.scalar_value();\n      }\n#endif\n      return *this;\n    }\n  \n    // All the compound assignment operators are unpacked, i.e. a+=b\n    // becomes a=a+b; first for an Expression on the rhs\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, ActiveReference&>::type\n    operator+=(const Expression<AType,E>& rhs) {\n      return *this = (*this + rhs);\n    }\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, ActiveReference&>::type\n    operator-=(const Expression<AType,E>& rhs) {\n      return *this = (*this - rhs);\n    }\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, ActiveReference&>::type\n    operator*=(const Expression<AType,E>& rhs) {\n      return *this = (*this * rhs);\n    }\n    template<typename AType, class E>\n    typename internal::enable_if<E::rank==0, ActiveReference&>::type\n    operator/=(const Expression<AType,E>& rhs) {\n      return *this = (*this / rhs);\n    }\n\n    // And likewise for a passive scalar on the rhs\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, ActiveReference&>::type\n    operator+=(const PType& rhs) {\n      val_ += rhs;\n      return *this;\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, ActiveReference&>::type\n    operator-=(const PType& rhs) {\n      val_ -= rhs;\n      return *this;\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, ActiveReference&>::type\n    operator*=(const PType& rhs) {\n      return *this = (*this * rhs);\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, ActiveReference&>::type\n    operator/=(const PType& rhs) {\n      return *this = (*this / rhs);\n    }\n\n      \n    // -------------------------------------------------------------------\n    // 4. Public member functions that don't modify the object\n    // -------------------------------------------------------------------\n\n    // Get the underlying passive value of this object\n    Type value() const {\n      return val_; \n    }\n\n    // Get the index of the gradient information for this object\n    const Index& gradient_index() const { return gradient_index_; }\n\n    // If an expression leads to calc_gradient being called on an\n    // active object, we push the multiplier and the gradient index on\n    // to the operation stack (or 1.0 if no multiplier is specified)\n    template <int Rank>\n    void calc_gradient(Stack& stack, const ExpressionSize<Rank>&) const {\n      stack.push_rhs(1.0, gradient_index_);\n    }\n\n    template <int Rank, typename MyType>\n    void calc_gradient(Stack& stack, const MyType& multiplier, \n\t\t       const ExpressionSize<Rank>&) const {\n      stack.push_rhs(multiplier, gradient_index_);\n    }\n\n    // Set the value of the gradient, for initializing an adjoint;\n    // note that the value of the gradient is not held in the active\n    // object but rather held by the stack\n    template <typename MyType>\n    void set_gradient(const MyType& gradient) const {\n      return ADEPT_ACTIVE_STACK->set_gradients(gradient_index_,\n\t\t\t\t\t       gradient_index_+1, \n\t\t\t\t\t       &gradient);\n    }\n\n    // Get the value of the gradient, for extracting the adjoint after\n    // calling reverse() on the stack\n    template <typename MyType>\n    void get_gradient(MyType& gradient) const {\n      return ADEPT_ACTIVE_STACK->get_gradients(gradient_index_,\n\t\t\t\t\t       gradient_index_+1, &gradient);\n    }\n    Type get_gradient() const {\n      Type gradient = 0;\n      ADEPT_ACTIVE_STACK->get_gradients(gradient_index_,\n\t\t\t\t\tgradient_index_+1, &gradient);\n      return gradient;\n    }\n \n\n    // For modular codes, some modules may have an existing\n    // Jacobian code and possibly be unsuitable for automatic\n    // differentiation using Adept (e.g. because they are written in\n    // Fortran).  In this case, we can use the following two functions\n    // to \"wrap\" the non-Adept code.\n\n    // Suppose the non-adept code uses the double values from n aReal\n    // objects pointed to by \"x\" to produce a single double value\n    // \"y_val\" (to be assigned to an aReal object \"y\"), plus a pointer\n    // to an array of forward derivatives \"dy_dx\".  Firstly you should\n    // assign the value using simply \"y = y_val;\", then call\n    // \"y.add_derivative_dependence(x, dy_dx, n);\" to specify how y\n    // depends on x. A fourth argument \"multiplier_stride\" may be used\n    // to stride the indexing to the derivatives, in case they are\n    // part of a matrix that is oriented in a different sense.\n    void add_derivative_dependence(const Active<Type>* rhs,\n\t\t\t\t   const Real* multiplier,\n\t\t\t\t   int n, \n\t\t\t\t   int multiplier_stride = 1) const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tADEPT_ACTIVE_STACK->check_space(n);\n#endif\n\tfor (int i = 0; i < n; i++) {\n\t  Real mult = multiplier[i*multiplier_stride];\n\t  if (mult != 0.0) {\n\t    // For each non-zero multiplier, add a pseudo-operation to\n\t    // the operation stack\n\t    ADEPT_ACTIVE_STACK->push_rhs(mult,\n\t\t\t\t\t rhs[i].gradient_index());\n\t  }\n\t}\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index_);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // Suppose the non-Adept code uses double values from n aReal\n    // objects pointed to by \"x\" and m aReal objects pointed to by \"z\"\n    // to produce a single double value, plus pointers to arrays of\n    // forward derivatives \"dy_dx\" and \"dy_dz\".  Firstly, as above,\n    // you should assign the value using simply \"y = y_val;\", then\n    // call \"y.add_derivative_dependence(x, dy_dx, n);\" to specify how\n    // y depends on x.  To specify also how y depends on z, call\n    // \"y.append_derivative_dependence(z, dy_dz, n);\".\n    template <typename T>\n    void append_derivative_dependence(const Active<Type>* rhs,\n\t\t\t\t      const Real* multiplier,\n\t\t\t\t      int n,\n\t\t\t\t      int multiplier_stride = 1) const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tADEPT_ACTIVE_STACK->check_space(n);\n#endif\n\tfor (int i = 0; i < n; i ++) {\n\t  Real mult = multiplier[i*multiplier_stride];\n\t  if (mult != 0.0) {\n\t    // For each non-zero multiplier, add a pseudo-operation to\n\t    // the operation stack\n\t    ADEPT_ACTIVE_STACK->push_rhs(mult,\n\t\t\t\t\t rhs[i].gradient_index());\n\t  }\n\t}\n\tif (!(ADEPT_ACTIVE_STACK->update_lhs(gradient_index_))) {\n\t  throw wrong_gradient(\"Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call\"\n\t\t\t       ADEPT_EXCEPTION_LOCATION);\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // For only one independent variable on the rhs, these two\n    // functions are convenient as they don't involve pointers\n    template <class T>\n    void add_derivative_dependence(T& rhs, Real multiplier) const {\n      ADEPT_ACTIVE_STACK->add_derivative_dependence(gradient_index_,\n\t\t\t\t\t\t    rhs.gradient_index(),\n\t\t\t\t\t\t    multiplier);\n    }\n    template <class T>\n    void append_derivative_dependence(T& rhs, Real multiplier) const {\n      ADEPT_ACTIVE_STACK->append_derivative_dependence(gradient_index_,\n\t\t\t\t\t\t       rhs.gradient_index(),\n\t\t\t\t\t\t       multiplier);\n    }\n     \n \n    // -------------------------------------------------------------------\n    // 4.1. Public member functions used by other expressions\n    // -------------------------------------------------------------------\n    bool get_dimensions_(ExpressionSize<0>& dim) const { return true; }\n\n    std::string expression_string_() const {\n      std::stringstream s;\n      s << \"ActiveReference(\" << val_ << \")\";\n      return s.str();\n    }\n\n    bool is_aliased_(const Type* mem1, const Type* mem2) const { \n      return &val_ >= mem1 && &val_ <= mem2; \n    }\n\n    Type value_with_len_(const Index& j, const Index& len) const\n    { return val_; }\n\n    template <int MyArrayNum, int NArrays>\n    void advance_location_(ExpressionSize<NArrays>& loc) const { } \n\n    template <int MyArrayNum, int NArrays>\n    Type value_at_location_(const ExpressionSize<NArrays>& loc) const\n    { return val_; }\n    \n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\tinternal::ScratchVector<NScratch>& scratch) const\n    { return val_; }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t     const internal::ScratchVector<NScratch>& scratch) const\n    { return val_; }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    void calc_gradient_(Stack& stack, \n\t\t\tconst ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch) const {\n      stack.push_rhs(1.0, gradient_index_);\n    }\n\n    template <int MyArrayNum, int MyScratchNum, \n\t      int NArrays, int NScratch, typename MyType>\n    void calc_gradient_(Stack& stack, \n\t\t\tconst ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch,\n\t\t\tconst MyType& multiplier) const {\n      stack.push_rhs(multiplier, gradient_index_);\n    }\n\n    template <int MyArrayNum, int Rank, int NArrays>\n    void set_location_(const ExpressionSize<Rank>& i, \n\t\t       ExpressionSize<NArrays>& index) const {}\n\n\n    // The Stack::independent(x) and Stack::dependent(y) functions add\n    // the gradient_index of objects x and y to std::vector<uIndex>\n    // objects in Stack. Since x and y may be scalars or arrays, this\n    // is best done by delegating to the ActiveReference or Array classes.\n    template <typename IndexType>\n    void push_gradient_indices(std::vector<IndexType>& vec) const {\n      vec.push_back(gradient_index_);\n    }\n\n    // -------------------------------------------------------------------\n    // 5. Public member functions that modify the object\n    // -------------------------------------------------------------------\n\n    // Set the value \n    template <typename MyType>\n    void set_value(const MyType& x) { val_ = x; }\n    \n    // -------------------------------------------------------------------\n    // 6. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n    \n    // For use in creating active references, to get a non-const\n    // reference to the underlying passive data\n    Type& lvalue() { return val_; }\n\n    // -------------------------------------------------------------------\n    // 7. Data\n    // -------------------------------------------------------------------\n  private:\n    Type& val_;                    // Reference to the numerical value\n    Index gradient_index_;         // Index to where the corresponding\n\t\t\t\t   // gradient will be held during the\n\t\t\t\t   // adjoint calculation\n  }; // End of definition of ActiveReference\n\n\n  // ---------------------------------------------------------------------\n  // Helper function for ActiveReference class\n  // ---------------------------------------------------------------------\n\n  template<typename Type>\n  inline\n  std::ostream&\n  operator<<(std::ostream& os, const ActiveReference<Type>& v)\n  {\n    os << v.value();\n    return os;\n  }\n\n\n  namespace internal {\n    \n    // ---------------------------------------------------------------------\n    // active_reference\n    // ---------------------------------------------------------------------\n\n    // Return the active reference version of Type if it is active,\n    // otherwise just return Type&\n\n    template <class Type, bool IsActive> struct active_reference {\n      typedef Type& type;\n    };\n    template <class Type> struct active_reference<Type, true> {\n      typedef ActiveReference<Type> type;\n    };\n  }\n\n\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/Allocator.h",
    "content": "/* Allocator.h -- Allocates elements to arrays\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptAllocator_H\n#define AdeptAllocator_H 1\n\n#include <adept/Array.h>\n//#include <adept/SpecialMatrix.h>\n\nnamespace adept {\n  namespace internal {\n   \n    template <int Rank, class A>\n    class Allocator {\n    public:\n      // Create an allocator object and copy the first object in it\n      template <typename F>\n      Allocator(A& array, const F& first_arg) \n\t: array_(array), size_(array.dimensions()),\n\t  //\t  filled_size_(0), \n\t  obj_size_(0), coords_(0),\n\t  scalar_size_(1) {\n\t*this << first_arg;\n      }\n\n      // Copy a scalar into the array\n      template <typename T>\n      typename enable_if<is_not_expression<T>::value,Allocator&>::type\n      operator<<(const T& x) {\n\tif (coords_[Rank-1] >= size_[Rank-1]) {\n\t  // We have reached the end of the array: move to next row\n\t  complete_row<Rank>();\n\t  // All dimensions of this object are of length 1\n\t  obj_size_.set_all(1);\n\t}\n\telse if (coords_[Rank-1] == 0) {\n\t  // At the beginning of a row: set the size of the template\n\t  // object to that of a scalar\n\t  obj_size_ = scalar_size_;\t  \n\t}\n\telse if (obj_size_ != scalar_size_) {\n\t  // The template object size is not the same as a scalar,\n\t  // indicating that dissimilar objects have been concatenated\n\t  // in a row\n\t  throw index_out_of_bounds(\"Scalar added to array with \\\"<<\\\" when previous objects on row were not scalar\" \n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\t// Add the scalar to the array and increment the final index\n\tarray_.get_lvalue(coords_) = x;\n\t++coords_[Rank-1];\n\treturn *this;\n      }\n\n\n      // Copy an expression into the array\n      template <typename T, class E>\n      typename enable_if<(E::rank <= Rank), Allocator&>::type\n      operator<<(const Expression<T,E>& x) {\n\t// Evaluate expression and store in an Array of the same rank\n\t// (if Expression is already an Array then this will make a\n\t// shallow copy). Ought to check for aliasing.\n\tconst Array<E::rank,T,E::is_active> xx(x.cast());\n\tExpressionSize<Rank-1> leading_dim;\n\t//\tleading_dim.copy_dissimilar(xx.dimensions());\n\tpartial_copy(xx.dimensions(), leading_dim);\n\n\tif (coords_[Rank-1] >= size_[Rank-1]) {\n\t  // We have reached the end of the array: move to next row\n\t  complete_row<Rank>();\n\t}\n\tif (coords_[Rank-1] == 0) {\n\t  partial_copy(xx.dimensions(), obj_size_);\n\t}\n\telse if (obj_size_ != leading_dim) {\n\t  // The template object size is not the same as the current\n\t  // array, indicating that dissimilar objects have been\n\t  // concatenated in a row\n\t  throw index_out_of_bounds(\"Expression added to array with \\\"<<\\\" does not match size of previous objects on row\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\t// Add the object to the array and increment the final index\n\tExpressionSize<Rank> i_lhs(coords_);\n\tExpressionSize<E::rank> i_rhs(0);\n\tint rank;\n\tdo {\n\t  array_.get_lvalue(i_lhs) = xx.get_rvalue(i_rhs);\n\t  advance_index(rank, i_lhs, i_rhs, xx.dimensions());\n\t}\n\twhile (rank >= 0);\n\t\n\tcoords_[Rank-1] += xx.dimension(E::rank-1);\n\treturn *this;\n      }\n\n      template <int RhsRank>\n      void advance_index(int& rank, ExpressionSize<Rank>& i_lhs, \n\t\t\t ExpressionSize<RhsRank>& i_rhs,\n\t\t\t const ExpressionSize<RhsRank>& size) const {\n\trank = RhsRank;\n\twhile (--rank >= 0) {\n\t  if (++i_rhs[rank] >= size[rank]) {\n\t    i_rhs[rank] = 0;\n\t    i_lhs[rank+(Rank-RhsRank)] -= (size[rank]-1);\n\t    }\n\t  else {\n\t    ++i_lhs[rank+(Rank-RhsRank)];\n\t    break;\n\t  }\n\t}\n      }\n      \n      // Comma operator does the same as \"<<\" operator\n      template <typename T>\n      typename enable_if<is_not_expression<T>::value,Allocator&>::type\n      operator,(const T& x) {\n\treturn *this << x;\n      }\n\t\n    protected:\n      // A vector should never complete a row as this indicates it has\n      // been overfilled\n      template <int MyRank>\n      typename enable_if<(MyRank <= 1), void>::type\n      complete_row() {\n\tthrow index_out_of_bounds(\"Row overflow in filling Vector with \\\"<<\\\"\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n\n      // Multi-dimensional arrays: move to next row, checking which\n      // dimensions have been filled\n      template <int MyRank>\n      typename enable_if<(MyRank > 1), void>::type\n      complete_row() {\n\tint next_dim = Rank-2;\n\twhile (next_dim >= 0) {\n\t  if (coords_[next_dim]+obj_size_[next_dim] < size_[next_dim]) {\n\t    //\t    filled_size_[next_dim] += obj_size_[next_dim];\n\t    coords_[next_dim] += obj_size_[next_dim];\n\t    for (int i = next_dim+1; i < Rank; ++i) {\n\t      coords_[i] = 0;\n\t    }\n\t    break;\n\t  }\n\t  --next_dim;\n\t}\n\tif (next_dim < 0) {\n\t  throw index_out_of_bounds(\"Dimension overflow in filling array with \\\"<<\\\"\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\tobj_size_.set_all(0);\n      }\n\n      template <int MyRank>\n      typename enable_if<(MyRank > 1), void>::type\n      partial_copy(const ExpressionSize<MyRank>& from,\n\t\t   ExpressionSize<Rank-1>& to) const {\n\tfor (int i = 0; i < Rank-MyRank; ++i) {\n\t  to[i] = 1;\n\t}\n\tfor (int i = Rank-MyRank; i < Rank-1; ++i) {\n\t  to[i] = from[i+(MyRank-Rank)];\n\t}\n      }\n\n      template <int MyRank>\n      typename enable_if<(MyRank <= 1), void>::type\n      partial_copy(const ExpressionSize<MyRank>& from,\n\t\t   ExpressionSize<Rank-1>& to) const {\n\tto.set_all(1);\n      }\n\n\n    protected:\n      A& array_;\n      const ExpressionSize<Rank> size_;\n      //      ExpressionSize<Rank-1> filled_size_;\n      ExpressionSize<Rank-1> obj_size_;\n      ExpressionSize<Rank> coords_;\n      const ExpressionSize<Rank-1> scalar_size_;\n    };\n    \n  }\n\n  // Allow object to be filled with \"A << 1, 2, 3\";\n  template <int Rank, typename T, bool IsActive, typename E>\n  internal::Allocator<Rank,Array<Rank,T,IsActive> > \n  operator<<(Array<Rank,T,IsActive>& array, const E& x) {\n    if (array.empty()) {\n      throw empty_array(\"Attempt to fill empty array with \\\"<<\\\"\"\n\t\t\tADEPT_EXCEPTION_LOCATION);\n    }\n    return internal::Allocator<Rank,Array<Rank,T,IsActive> >(array, x);\n  }\n\n}\n\n\n#endif\n"
  },
  {
    "path": "include/adept/Array.h",
    "content": "/* Array.h -- active or inactive Array of arbitrary rank\n\n    Copyright (C) 2014-2021 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The Array class has functionality modelled on Fortran-90 arrays -\n   they can have a rank up to 7 (above will work, but some forms of\n   indexing these arrays will not work).\n\n*/\n\n#ifndef AdeptArray_H\n#define AdeptArray_H 1\n\n#include <iostream>\n#include <sstream>\n#include <limits>\n#include <string>\n\n#include <adept/base.h>\n\n#ifdef ADEPT_CXX11_FEATURES\n#include <initializer_list>\n#endif\n\n#include <adept/Storage.h>\n#include <adept/Expression.h>\n#include <adept/RangeIndex.h>\n#include <adept/ActiveReference.h>\n#include <adept/ActiveConstReference.h>\n#include <adept/IndexedArray.h>\n#include <adept/where.h>\n#include <adept/noalias.h>\n#include <adept/GradientIndex.h>\n\nnamespace adept {\n\n  enum ArrayPrintStyle {\n    PRINT_STYLE_PLAIN,\n    PRINT_STYLE_CSV,\n    PRINT_STYLE_CURLY,\n    PRINT_STYLE_MATLAB\n  };\n\n  enum MatrixStorageOrder {\n    ROW_MAJOR=0, COL_MAJOR=1\n  };\n\n  // Forward declarations to enable diag_matrix\n  template <typename, class, bool> class SpecialMatrix;\n  namespace internal {\n    template <MatrixStorageOrder, Index, Index> struct BandEngine;\n  }\n\n  // Forward declaration to enable linking at construction and via\n  // link to FixedArray\n  template <typename, bool, Index, Index, Index, Index, Index, Index, Index>\n  class FixedArray;\n\n  namespace internal {\n\n    // -------------------------------------------------------------------\n    // Global variables\n    // -------------------------------------------------------------------\n    // The following global variables affect the behaviour of the\n    // Array class, and are modified using set_*\n\n    // This is \"true\" by default: row-major is the normal C/C++\n    // convention\n    extern bool array_row_major_order;\n\n    // When arrays are sent to a stream the dimensions can be grouped\n    // with curly brackets\n    //    extern bool array_print_curly_brackets;\n\n    // Variables describing how arrays are written to a stream\n    extern ArrayPrintStyle array_print_style;\n    extern std::string vector_separator;\n    extern std::string vector_print_before;\n    extern std::string vector_print_after;\n    extern std::string array_opening_bracket;\n    extern std::string array_closing_bracket;\n    extern std::string array_contiguous_separator;\n    extern std::string array_non_contiguous_separator;\n    extern std::string array_print_before;\n    extern std::string array_print_after;\n    extern std::string array_print_empty_before;\n    extern std::string array_print_empty_after;\n    extern bool array_print_indent;\n    extern bool array_print_empty_rank;\n\n    // Forward declaration to enable Array::where()\n    //    template <class A, class B> class Where;\n\n    // -------------------------------------------------------------------\n    // Helper classes\n    // -------------------------------------------------------------------\n\n    // The following are used by expression_string()\n    template <int Rank, bool IsActive>\n    struct array_helper            { const char* name() { return \"Array\";  } };\n    template <int Rank>\n    struct array_helper<Rank,true> { const char* name() { return \"aArray\";  } };\n\n    template <>\n    struct array_helper<1,false>   { const char* name() { return \"Vector\"; } };\n    template <>\n    struct array_helper<1,true>    { const char* name() { return \"aVector\"; } };\n\n    template <>\n    struct array_helper<2,false>   { const char* name() { return \"Matrix\"; } };\n    template <>\n    struct array_helper<2,true>    { const char* name() { return \"aMatrix\"; } };\n\n  } // End namespace internal\n\n\n  // -------------------------------------------------------------------\n  // Definition of Array class\n  // -------------------------------------------------------------------\n  template<int Rank, typename Type = Real, bool IsActive = false>\n  class Array\n    : public Expression<Type,Array<Rank,Type,IsActive> >,\n      protected internal::GradientIndex<IsActive> {\n\n  public:\n    // -------------------------------------------------------------------\n    // Array: 1. Static Definitions\n    // -------------------------------------------------------------------\n\n    // The Expression base class needs access to some protected member\n    // functions in section 5\n    friend struct Expression<Type,Array<Rank,Type,IsActive> >;\n\n    // Static definitions to enable the properties of this type of\n    // expression to be discerned at compile time\n    static const bool is_active  = IsActive;\n    static const bool is_lvalue  = true;\n    static const int  rank       = Rank;\n    static const int  n_active   = IsActive * (1 + internal::is_complex<Type>::value);\n    static const int  n_scratch  = 0;\n    static const int  n_arrays   = 1;\n    static const bool is_vectorizable = Packet<Type>::is_vectorized;\n\n    // -------------------------------------------------------------------\n    // Array: 2. Constructors\n    // -------------------------------------------------------------------\n    \n    // Initialize an empty array\n    Array() : data_(0), storage_(0), dimensions_(0)\n    { ADEPT_STATIC_ASSERT(!(std::numeric_limits<Type>::is_integer\n\t\t\t    && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS); }\n\n    // Initialize an array with specified size\n    Array(const Index* dims) : storage_(0)\n    { resize(dims); }\n    Array(const ExpressionSize<Rank>& dims) : storage_(0)\n    { resize(dims); }\n\n    // A way to only enable construction if the correct number of\n    // arguments is provided (resize_<x> is only defined for x==Rank)\n    Array(Index m0) : storage_(0) { resize_<1>(m0); }\n    Array(Index m0, Index m1) : storage_(0) { resize_<2>(m0,m1); }\n    Array(Index m0, Index m1, Index m2) : storage_(0) { resize_<3>(m0,m1,m2); }\n    Array(Index m0, Index m1, Index m2, Index m3) : storage_(0) \n    { resize_<4>(m0,m1,m2,m3); }\n    Array(Index m0, Index m1, Index m2, Index m3, Index m4)  : storage_(0)\n    { resize_<5>(m0,m1,m2,m3,m4); }\n    Array(Index m0, Index m1, Index m2, Index m3, Index m4, Index m5)  : storage_(0)\n    { resize_<6>(m0,m1,m2,m3,m4,m5); }\n    Array(Index m0, Index m1, Index m2, Index m3, Index m4, Index m5, Index m6) \n      : storage_(0) \n    { resize_<7>(m0,m1,m2,m3,m4,m5,m6); }\n\n    // A way to directly create arrays, needed when subsetting\n    // other arrays\n    Array(Type* data, Storage<Type>* s, const ExpressionSize<Rank>& dims,\n\t  const ExpressionSize<Rank>& offset)\n      : data_(data), storage_(s), dimensions_(dims), offset_(offset) { \n      if (storage_) {\n\tstorage_->add_link(); \n\tinternal::GradientIndex<IsActive>::set(data_, storage_);\n      }\n      else {\n\t// Active arrays need a gradient index so it is an error for\n\t// them to get to this point\n\tinternal::GradientIndex<IsActive>::assert_inactive();\n      }\n    }\n\n    // Similar to the above, but with the gradient index supplied explicitly,\n    // needed when an active FixedArray is being sliced, which\n    // produces an active Array\n    Array(const Type* data0, Index data_offset, const ExpressionSize<Rank>& dims,\n\t  const ExpressionSize<Rank>& offset, Index gradient_index0)\n      : internal::GradientIndex<IsActive>(gradient_index0, data_offset),\n\tdata_(const_cast<Type*>(data0)+data_offset), storage_(0), dimensions_(dims), offset_(offset) { }\n\n    // Initialize an array pointing at existing data: the fact that\n    // storage_ is a null pointer is used to convey the information\n    // that it is not necessary to deallocate the data when this array\n    // is destructed\n    Array(Type* data, const ExpressionSize<Rank>& dims)\n      : data_(data), storage_(0), dimensions_(dims) {\n      ADEPT_STATIC_ASSERT(!IsActive, CANNOT_CONSTRUCT_ACTIVE_ARRAY_WITHOUT_GRADIENT_INDEX);\n      // Active arrays need a gradient index so it is an error for\n      // them to get to this point\n      internal::GradientIndex<IsActive>::assert_inactive();\n      pack_contiguous_(); \n    }\n\n    // Copy constructor: links to the source data rather than copying\n    // it.  This is needed because we want a function returning an\n    // Array not to make a deep copy, but rather to perform a\n    // (computationally cheaper) shallow copy; when the Array within\n    // the function is destructed, it will remove its link to the\n    // data, and the responsibility for deallocating the data will\n    // then pass to the Array in the calling function.\n    Array(Array& rhs) \n      : internal::GradientIndex<IsActive>(rhs.gradient_index()), \n\tdata_(rhs.data()), storage_(rhs.storage()), \n\tdimensions_(rhs.dimensions()), offset_(rhs.offset())\n    {\n      if (storage_) storage_->add_link(); \n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running constructor Array(Array&)\\n\";\n#endif\n    }\n\n    // Copy constructor with const argument does exactly the same\n    // thing\n    Array(const Array& rhs) \n      : internal::GradientIndex<IsActive>(rhs.gradient_index()),\n\tdimensions_(rhs.dimensions()), offset_(rhs.offset())\n    { \n      link_(const_cast<Array&>(rhs));\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running constructor Array(const Array&)\\n\";\n#endif\n    }\n  private:\n    void link_(Array& rhs) {\n      data_ = const_cast<Type*>(rhs.data()); \n      storage_ = const_cast<Storage<Type>*>(rhs.storage());\n      if (storage_) storage_->add_link();\n    }\n\n  public:\n\n    // Initialize with an expression on the right hand side by\n    // evaluating the expression, requiring the ranks to be equal.\n    // Note that this constructor enables expressions to be used as\n    // arguments to functions that expect an array - to prevent this\n    // implicit conversion, use the \"explicit\" keyword.\n    template<typename EType, class E>\n    Array(const Expression<EType, E>& rhs,\n\t  typename internal::enable_if<E::rank == Rank && (Rank > 0),int>::type = 0)\n      : data_(0), storage_(0), dimensions_(0)\n    {\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running constructor Array(const Expression&), implemented by assignment\\n\";\n#endif\n      *this = rhs; \n    }\n\n#ifdef ADEPT_CXX11_FEATURES\n    // Initialize from initializer list\n    template <typename T>\n    Array(std::initializer_list<T> list) : data_(0), storage_(0), dimensions_(0) {\n      *this = list;\n    }\n\n    // The unfortunate restrictions on initializer_list constructors\n    // mean that each possible Array rank needs explicit treatment\n    template <typename T>\n    Array(std::initializer_list<\n\t  std::initializer_list<T> > list)\n      : data_(0), storage_(0), dimensions_(0) { *this = list; }\n\n    template <typename T>\n    Array(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > list)\n      : data_(0), storage_(0), dimensions_(0) { *this = list; }\n\n    template <typename T>\n    Array(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > list)\n      : data_(0), storage_(0), dimensions_(0) { *this = list; }\n\n    template <typename T>\n    Array(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > > list)\n      : data_(0), storage_(0), dimensions_(0) { *this = list; }\n\n    template <typename T>\n    Array(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > > > list)\n      : data_(0), storage_(0), dimensions_(0) { *this = list; }\n\n    template <typename T>\n    Array(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > > > > list)\n      : data_(0), storage_(0), dimensions_(0) { *this = list; }\n    \n\n#endif\n\n\n    // Destructor: if the data are stored in a Storage object then we\n    // tell it that one fewer object is linking to it; if the number\n    // of links to it drops to zero, it will destruct itself and\n    // deallocate the memory.\n    ~Array()\n    { if (storage_) storage_->remove_link(); }\n\n    // -------------------------------------------------------------------\n    // Array: 3. Assignment operators\n    // -------------------------------------------------------------------\n\n    // Assignment to another matrix: copy the data...\n    // Ideally we would like this to fall back to the operator=(const\n    // Expression&) function, but if we don't define a copy assignment\n    // operator then C++ will generate a default one :-(\n    Array& operator=(const Array& rhs) {\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running Array::operator=(const Array&), implemented with operator=(const Expression&)\\n\";\n#endif\n      return (*this = static_cast<const Expression<Type,Array>&> (rhs));\n    }\n\n#ifdef ADEPT_MOVE_SEMANTICS\n    Array& operator=(Array&& rhs) {\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running Array::operator=(Array&&)\\n\";\n#endif\n      // A fast \"swap\" operation can be performed only if the present\n      // (\"this\") array is either empty, or its data is contained in a\n      // Storage object with only one link to it (corresponding to the\n      // present array). We may not perform a swap if its data is not\n      // in a Storage object, since it might be linked to another\n      // location that is expecting the result of the assignment to\n      // change the data in that location. We also require that the\n      // RHS data would otherwise be lost (but it is not clear that\n      // this is necessary).\n      if ((empty() || (storage_ && storage_->n_links() == 1))\n\t  && (!rhs.storage() || rhs.storage()->n_links() == 1)) {\n\t// We still need to check that the dimensions match\n\tif (empty() || internal::compatible(dimensions_, rhs.dimensions())) {\n\t  swap(*this, rhs);\n\t}\n\telse {\n\t  std::string str = rhs.expression_string()\n\t    + \" assigned to \" + expression_string_();\n\t  throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      else {\n\t// Need a full copy because other arrays are linked to the\n\t// Storage object\n\t*this = static_cast<const Expression<Type,Array>&> (rhs);\n      }\n      return *this;\n    }\n\n    friend void swap(Array& l, Array& r) noexcept {\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running swap(Array&,Array&)\\n\";\n#endif\n      Type* tmp_data = l.data_;\n      l.data_ = r.data_;\n      r.data_ = tmp_data;\n      Storage<Type>* tmp_storage = l.storage_;\n      l.storage_ = r.storage_;\n      r.storage_ = tmp_storage;\n      swap(l.dimensions_, r.dimensions_);\n      swap(l.offset_, r.offset_);\n      static_cast<internal::GradientIndex<IsActive>&>(l).swap_value(static_cast<internal::GradientIndex<IsActive>&>(r));\n    }\n\n#endif\n\n\n    // Assignment to an array expression of the same rank\n    template <typename EType, class E>\n    inline //__attribute__((always_inline))\n    typename internal::enable_if<E::rank == Rank, Array&>::type\n    operator=(const Expression<EType,E>&  __restrict rhs) {\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running Array::operator=(const Expression&)\\n\";\n#endif\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      ExpressionSize<Rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (empty()) {\n\tresize(dims);\n      }\n      else if (!internal::compatible(dims, dimensions_)) {\n\tstd::string str = \"Expr\";\n\tstr += dims.str() + \" object assigned to \" + expression_string_();\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n#else\n      if (empty()) {\n\tExpressionSize<Rank> dims;\n\tif (!rhs.get_dimensions(dims)) {\n\t  std::string str = \"Array size mismatch in \"\n\t    + rhs.expression_string() + \".\";\n\t  throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n\t}\t\n\tresize(dims);\n      }\n#endif\n      if (!empty()) {\n#ifndef ADEPT_NO_ALIAS_CHECKING\n\t// Check for aliasing first\n\tType const * ptr_begin;\n\tType const * ptr_end;\n\tdata_range(ptr_begin, ptr_end);\n\tif (rhs.is_aliased(ptr_begin, ptr_end)) {\n\t  Array<Rank,Type,IsActive> copy;\n\t  // It would be nice to wrap noalias around rhs, but then\n\t  // this leads to infinite template recursion since the \"=\"\n\t  // operator calls the current function but with a modified\n\t  // expression type. perhaps a better way would be to make\n\t  // copy.assign_no_alias(rhs) work.\n\t  copy = rhs;\n\t  assign_expression_<Rank, IsActive, E::is_active>(copy);\n\t}\n\telse {\n#endif\n\t  // Select active/passive version by delegating to a\n\t  // protected function\n\t  // The cast() is needed because assign_expression_ accepts\n\t  // its argument by value\n\t  assign_expression_<Rank, IsActive, E::is_active>(rhs.cast());\n#ifndef ADEPT_NO_ALIAS_CHECKING\n\t}\n#endif\n      }\n      return *this;\n    }\n\n\n    // Assignment to an array expression of the same rank in which the\n    // activeness of the right-hand-side is ignored\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == Rank, Array&>::type\n    assign_inactive(const Expression<EType,E>& rhs) {\n      ExpressionSize<Rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (empty()) {\n\tresize(dims);\n      }\n      else if (!internal::compatible(dims, dimensions_)) {\n\tstd::string str = \"Expr\";\n\tstr += dims.str() + \" object assigned to \" + expression_string_();\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n\n      if (!empty()) {\n\t// Check for aliasing first\n\tType const * ptr_begin;\n\tType const * ptr_end;\n\tdata_range(ptr_begin, ptr_end);\n\tif (rhs.is_aliased(ptr_begin, ptr_end)) {\n\t  Array<Rank,Type,IsActive> copy;\n\t  copy.assign_inactive(rhs);\n\t  //\t  *this = copy;\n\t  assign_expression_<Rank, IsActive, false>(copy);\n\t}\n\telse {\n\t  assign_expression_<Rank, IsActive, false>(rhs.cast());\n\t}\n      }\n      return *this;\n    }\n\n    // Assignment to a single value copies to every element\n    template <typename RType>\n    typename internal::enable_if<internal::is_not_expression<RType>::value\n                       // FIX\n                       || internal::is_active<Type>::value\n\t\t       , Array&>::type\n    operator=(RType rhs) {\n      if (!empty()) {\n\tassign_inactive_scalar_<Rank,IsActive>(rhs);\n      }\n      return *this;\n    }\n\n    // Assign active scalar expression to an active array by first\n    // converting the RHS to an active scalar\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == 0 && (Rank > 0) && IsActive && !E::is_lvalue,\n      Array&>::type\n    operator=(const Expression<EType,E>& rhs) {\n      Active<EType> x = rhs;\n      *this = x;\n      return *this;\n    }\n\n    // Assign an active scalar to an active array\n    template <typename PType>\n    // FIX\n    typename internal::enable_if<!internal::is_active<PType>::value && IsActive, Array&>::type\n    //    Array& \n    operator=(const Active<PType>& rhs) {\n      ADEPT_STATIC_ASSERT(IsActive, ATTEMPT_TO_ASSIGN_ACTIVE_SCALAR_TO_INACTIVE_ARRAY);\n      if (!empty()) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n\tif (!ADEPT_ACTIVE_STACK->is_recording()) {\n\t  assign_inactive_scalar_<Rank,false>(rhs.scalar_value());\n\t  return *this;\n\t}\n#endif\n\tExpressionSize<Rank> i(0);\n\tIndex index = 0;\n\tint my_rank;\n\tstatic const int last = Rank-1;\n\t// In case PType != Type we make a local copy to minimize type\n\t// conversions\n\tType val = rhs.scalar_value();\n\t\n\tADEPT_ACTIVE_STACK->check_space(size());\n\tdo {\n\t  i[last] = 0;\n\t  // Innermost loop\n\t  for ( ; i[last] < dimensions_[last]; ++i[last],\n\t\t  index += offset_[last]) {\n\t    data_[index] = val;\n\t    ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index());\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index);\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n      return *this;\n    }\n\n#define ADEPT_DEFINE_OPERATOR(OPERATOR, OPSYMBOL)\t\t\\\n    template <class RType>\t\t\t\t\\\n    Array& OPERATOR(const RType& rhs) {\t\t\t\\\n      return *this = noalias(*this OPSYMBOL rhs);\t\\\n    }\n    ADEPT_DEFINE_OPERATOR(operator+=, +)\n    ADEPT_DEFINE_OPERATOR(operator-=, -)\n    ADEPT_DEFINE_OPERATOR(operator*=, *)\n    ADEPT_DEFINE_OPERATOR(operator/=, /)\n  //    ADEPT_DEFINE_OPERATOR(operator&=, &);\n  //    ADEPT_DEFINE_OPERATOR(operator|=, |);\n#undef ADEPT_DEFINE_OPERATOR\n\n    // Enable the A.where(B) = C construct.\n\n    // Firstly implement the A.where(B) to return a \"Where<A,B>\" object\n    template <class B>\n    typename internal::enable_if<B::rank == Rank, internal::Where<Array,B> >::type\n    where(const Expression<bool,B>& bool_expr) {\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      ExpressionSize<Rank> dims;\n      if (!bool_expr.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + bool_expr.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims != dimensions_) {\n\tthrow size_mismatch(\"Boolean expression of different size\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n#endif\n      return internal::Where<Array,B>(*this, bool_expr.cast());\n    }\n    \n    // When Where<A,B> = C is invoked, it calls\n    // A.assign_conditional(B,C). This is implemented separately for\n    // the case when C is an inactive scalar and when it is an array\n    // expression.\n    template <class B, typename C>\n    typename internal::enable_if<internal::is_not_expression<C>::value, void>::type\n    assign_conditional(const Expression<bool,B>& bool_expr,\n\t\t\t    C rhs) {\n      if (!empty()) {\n\tassign_conditional_inactive_scalar_<IsActive>(bool_expr, rhs);\n      }\n    }\n\n    template <class B, typename T, class C>\n    void assign_conditional(const Expression<bool,B>& bool_expr,\n\t\t\t    const Expression<T,C>& rhs) {\n      // Assume size of bool_expr already checked\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      ExpressionSize<Rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (!internal::compatible(dims,dimensions_)) {\n\tthrow size_mismatch(\"Right-hand-side of \\\"where\\\" construct of incompatible size\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n#endif\n      // Check for aliasing first\n      Type const * ptr_begin;\n      Type const * ptr_end;\n      data_range(ptr_begin, ptr_end);\n      if (rhs.is_aliased(ptr_begin, ptr_end)) {\n\tArray<Rank,Type,IsActive> copy;\n\tcopy = rhs;\n\tassign_conditional_<IsActive>(bool_expr.cast(), copy);\n      }\n      else {\n\t// Select active/passive version by delegating to a\n\t// protected function\n\tassign_conditional_<IsActive>(bool_expr.cast(), rhs.cast());\n      }\n      //      return *this;\n    }\n\n#ifdef ADEPT_CXX11_FEATURES\n    // Assignment of an Array to an initializer list; the first ought\n    // to only work for Vectors\n    template <typename T>\n    typename internal::enable_if<std::is_convertible<T,Type>::value, Array&>::type\n    operator=(std::initializer_list<T> list) {\n      ADEPT_STATIC_ASSERT(Rank==1,RANK_MISMATCH_IN_INITIALIZER_LIST);\n\n      if (empty()) {\n\tresize(list.size());\n      }\n      else if (list.size() > static_cast<std::size_t>(dimensions_[0])) {\n\tthrow size_mismatch(\"Initializer list is larger than Vector in assignment\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      // Zero the whole array first in order that automatic\n      // differentiation works\n      *this = 0;\n      Index index = 0;\n      for (auto i = std::begin(list); i < std::end(list); ++i,\n\t   ++index) {\n\tdata_[index*offset_[0]] = *i;\t\n      }\n      return *this;\n    }\n\n    // Assignment of a higher rank Array to a list of lists...\n    template <class IType>\n    Array& operator=(std::initializer_list<std::initializer_list<IType> > list) {\n      ADEPT_STATIC_ASSERT(Rank==internal::initializer_list_rank<IType>::value+2,\n      \t\t\t  RANK_MISMATCH_IN_INITIALIZER_LIST);\n      if (empty()) {\n\tIndex dims[ADEPT_MAX_ARRAY_DIMENSIONS];\n\tint ndims = 0;\n\tshape_initializer_list_(list, dims, ndims);\n\tresize(dims);\n      }\n      else if (list.size() > static_cast<std::size_t>(dimensions_[0])) {\n\tthrow size_mismatch(\"Multi-dimensional initializer list larger than slowest-varying dimension of Array\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      Index index = 0;\n      for (auto i = std::begin(list); i < std::end(list); ++i,\n\t   ++index) {\n\t(*this)[index] = *i;\n      }\n      return *this;\n    }\n\n\n  protected:\n    template <typename T>\n    typename internal::enable_if<std::is_convertible<T,Type>::value>::type\n    shape_initializer_list_(std::initializer_list<T> list,\n\t\t\t    Index* dims, int& ndims) const {\n      dims[ndims] = list.size();\n      ndims++;\n    }\n    template <class IType>\n    void\n    shape_initializer_list_(std::initializer_list<std::initializer_list<IType> > list,\n\t\t\t    Index* dims, int& ndims) const {\n      dims[ndims] = list.size();\n      ndims++;\n      shape_initializer_list_(*(list.begin()), dims, ndims);\n    }\n\n\n  public:\n\n#endif\n\n\n  \n    // -------------------------------------------------------------------\n    // Array: 4. Access functions, particularly operator()\n    // -------------------------------------------------------------------\n  \n    // Get l-value of the element at the specified coordinates\n    typename internal::active_reference<Type,IsActive>::type\n    get_lvalue(const ExpressionSize<Rank>& i) {\n      return get_lvalue_<IsActive>(index_(i));\n    }\n    \n    typename internal::active_scalar<Type,IsActive>::type\n    get_rvalue(const ExpressionSize<Rank>& i) const {\n      return get_rvalue_<IsActive>(index_(i));\n    }\n\n  protected:\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive, ActiveReference<Type> >::type\n    get_lvalue_(const Index& loc) {\n      return ActiveReference<Type>(data_[loc], gradient_index()+loc);\n    }\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive, Type&>::type\n    get_lvalue_(const Index& loc) {\n      return data_[loc];\n    }\n\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive, Active<Type> >::type\n    get_rvalue_(const Index& loc) const {\n      return Active<Type>(data_[loc], gradient_index()+loc);\n    }\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive, const Type&>::type\n    get_rvalue_(const Index& loc) const {\n      return data_[loc];\n    }\n\n  public:\n    // Get a constant reference to the element at the specified\n    // location, ignoring whether it is active or not\n    //    const Type& get(const ExpressionSize<Rank>& i) const {\n    //      return data_[index_(i)];\n    //    }\n\n    // The following provide a way to access individual elements of\n    // the array.  There must be the same number of arguments to\n    // operator() as the rank of the array.  Each argument must be of\n    // integer type, or a rank-0 expression of integer type (such as\n    // \"end\" or \"end-3\"). Inactive arrays return a reference to the\n    // element, while active arrays return an ActiveReference<Type>\n    // object.  Up to 7 dimensions are supported.\n\n    // l-value access to inactive array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, Type&>::type\n    operator()(I0 i0) \n    { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; }\n\n    // r-value access to inactive array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, const Type&>::type\n    operator()(I0 i0) const\n    { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; }\n\n    // l-value access to inactive array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, Type&>::type\n    operator[](I0 i0) \n    { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; }\n\n    // r-value access to inactive array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, const Type&>::type\n    operator[](I0 i0) const\n    { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; }\n\n  protected:\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive,Type&>::type\n    get_scalar_reference(const Index& offset)\n    { return data_[offset]; }\n\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive,const Type&>::type\n    get_scalar_reference(const Index& offset) const\n    { return data_[offset]; }\n\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive,ActiveReference<Type> >::type\n    get_scalar_reference(const Index& offset) \n    { return ActiveReference<Type>(data_[offset], gradient_index()+offset); }\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive,ActiveConstReference<Type> >::type\n    get_scalar_reference(const Index& offset) const\n    { return ActiveConstReference<Type>(data_[offset], gradient_index()+offset); }\n\n  public:\n\n    // l-value access to active array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveReference<Type> >::type\n    operator()(I0 i0) {\n      Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0];\n      return ActiveReference<Type>(data_[offset], gradient_index()+offset);\n    }\n    \n    // r-value access to active array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveConstReference<Type> >::type\n    operator()(I0 i0) const {\n      Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0];\n      return ActiveConstReference<Type>(data_[offset], gradient_index()+offset);\n    }\n\n    // l-value access to active array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveReference<Type> >::type\n    operator[](I0 i0) {\n      Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0];\n      return ActiveReference<Type>(data_[offset], gradient_index()+offset);\n    }\n    \n    // r-value access to active array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<Rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveConstReference<Type> >::type\n    operator[](I0 i0) const {\n      Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0];\n      return ActiveConstReference<Type>(data_[offset], gradient_index()+offset);\n    }\n    \n    // 2D array l-value and r-value access\n    template <typename I0, typename I1>\n    typename internal::enable_if<Rank==2 && internal::all_scalar_ints<2,I0,I1>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1) {\n      return get_scalar_reference<IsActive>(\n\t\t    internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t  + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]);\n    }\n    template <typename I0, typename I1>\n    typename internal::enable_if<Rank==2 && internal::all_scalar_ints<2,I0,I1>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1) const {\n      return get_scalar_reference<IsActive>(\n\t\t    internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t  + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]);\n    }\n\n    // 3D array l-value and r-value access\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<Rank==3 && internal::all_scalar_ints<3,I0,I1,I2>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2) {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]);\n    }\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<Rank==3 && internal::all_scalar_ints<3,I0,I1,I2>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2) const {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]);\n    }\n\n    // 4D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<Rank==4 && internal::all_scalar_ints<4,I0,I1,I2,I3>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]);\n    }\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<Rank==4 && internal::all_scalar_ints<4,I0,I1,I2,I3>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) const {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]);\n    }\n\n    // 5D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<Rank==5 && internal::all_scalar_ints<5,I0,I1,I2,I3,I4>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]\n\t\t   + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]);\n    }\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<Rank==5 && internal::all_scalar_ints<5,I0,I1,I2,I3,I4>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]\n\t\t   + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]);\n    }\n\n    // 6D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<Rank==6 && internal::all_scalar_ints<6,I0,I1,I2,I3,I4,I5>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]\n\t\t   + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]\n\t\t   + internal::get_index_with_len(i5,dimensions_[5])*offset_[5]);\n    }\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<Rank==6 && internal::all_scalar_ints<6,I0,I1,I2,I3,I4,I5>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]\n\t\t   + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]\n\t\t   + internal::get_index_with_len(i5,dimensions_[5])*offset_[5]);\n    }\n\n    // 7D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<Rank==7 && internal::all_scalar_ints<7,I0,I1,I2,I3,I4,I5,I6>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]\n\t\t   + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]\n\t\t   + internal::get_index_with_len(i5,dimensions_[5])*offset_[5]\n\t\t   + internal::get_index_with_len(i6,dimensions_[6])*offset_[6]);\n    }\n     template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<Rank==7 && internal::all_scalar_ints<7,I0,I1,I2,I3,I4,I5,I6>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const {\n      return get_scalar_reference<IsActive>(\n\t\t     internal::get_index_with_len(i0,dimensions_[0])*offset_[0]\n\t\t   + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]\n\t\t   + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]\n\t\t   + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]\n\t\t   + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]\n\t\t   + internal::get_index_with_len(i5,dimensions_[5])*offset_[5]\n\t\t   + internal::get_index_with_len(i6,dimensions_[6])*offset_[6]);\n    }\n   \n\n    // The following define the case when operator() is called and one\n    // of the arguments is a \"range\" object (an object that describes\n    // a range of indices that are either contiguous or separated by a\n    // fixed stride), while all others are of integer type (or a\n    // rank-0 expression of integer type). An array object is returned\n    // with a rank that may be reduced from that of the original\n    // array, by one for each dimension that was indexed by an\n    // integer. The new array points to a subset of the original data,\n    // so modifying it will modify the original array.\n\n    // First the case of a vector where we know the argument must be a\n    // \"range\" object\n    template <typename I0>\n    typename internal::enable_if<internal::is_ranged<Rank,I0>::value,\n\t\t       Array<1,Type,IsActive> >::type\n    operator()(I0 i0) {\n      ExpressionSize<1> new_dim((i0.end(dimensions_[0])\n\t\t\t\t + i0.stride(dimensions_[0])\n\t\t\t\t -i0.begin(dimensions_[0]))\n\t\t\t\t/i0.stride(dimensions_[0]));\n      ExpressionSize<1> new_offset(i0.stride(dimensions_[0])*offset_[0]);\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running Array::operator()(RANGED)\\n\";\n#endif\n      return Array<1,Type,IsActive>(data_ + i0.begin(dimensions_[0])*offset_[0],\n\tstorage_, new_dim, new_offset);\n    }\n    template <typename I0>\n    typename internal::enable_if<internal::is_ranged<Rank,I0>::value,\n\t\t       const Array<1,Type,IsActive> >::type\n    operator()(I0 i0) const {\n      ExpressionSize<1> new_dim((i0.end(dimensions_[0])\n\t\t\t\t + i0.stride(dimensions_[0])\n\t\t\t\t -i0.begin(dimensions_[0]))\n\t\t\t\t/i0.stride(dimensions_[0]));\n      ExpressionSize<1> new_offset(i0.stride(dimensions_[0])*offset_[0]);\n#ifdef ADEPT_VERBOSE_FUNCTIONS\n      std::cout << \"  running Array::operator()(RANGED) const\\n\";\n#endif\n      return Array<1,Type,IsActive>(data_ + i0.begin(dimensions_[0])*offset_[0],\n\t\t\t\t    storage_, new_dim, new_offset);\n    }\n\n  private:\n    // For multi-dimensional arrays, we need a helper function\n\n    // Treat the indexing of dimension \"irank\" in the case that the\n    // index is of integer type\n    template <typename T, int NewRank>\n    typename internal::enable_if<internal::is_scalar_int<T>::value, void>::type\n    update_index(const Index& irank, const T& i, Index& inew_rank, Index& ibegin,\n\t\t ExpressionSize<NewRank>& new_dim, \n\t\t ExpressionSize<NewRank>& new_offset) const {\n      ibegin += internal::get_index_with_len(i,dimensions_[irank])*offset_[irank];\n    }\n\n    // Treat the indexing of dimension \"irank\" in the case that the\n    // index is a \"range\" object\n    template <typename T, int NewRank>\n    typename internal::enable_if<internal::is_range<T>::value, void>::type\n    update_index(const Index& irank, const T& i, Index& inew_rank, Index& ibegin,\n\t\t ExpressionSize<NewRank>& new_dim, \n\t\t ExpressionSize<NewRank>& new_offset) const {\n      ibegin += i.begin(dimensions_[irank])*offset_[irank];\n      new_dim[inew_rank]\n      = (i.end(dimensions_[irank])\n\t + i.stride(dimensions_[irank])-i.begin(dimensions_[irank]))\n      / i.stride(dimensions_[irank]);\n      new_offset[inew_rank] = i.stride(dimensions_[irank])*offset_[irank];\n      ++inew_rank;\n    }\n\n  public:\n\n    // Now the individual overloads for each number of arguments, up\n    // to 7, with separate r-value (const) and l-value (non-const)\n    // versions\n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1>::value,\n\t\t       Array<internal::is_ranged<Rank,I0,I1>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1) {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1>::value,\n\t\t       const Array<internal::is_ranged<Rank,I0,I1>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1) const {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2>::value,\n\t       Array<internal::is_ranged<Rank,I0,I1,I2>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2) {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2>::value,\n\t       const Array<internal::is_ranged<Rank,I0,I1,I2>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2) const {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3>::value,\n       Array<internal::is_ranged<Rank,I0,I1,I2,I3>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3>::value,\n       const Array<internal::is_ranged<Rank,I0,I1,I2,I3>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) const {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3,I4>::value,\n       Array<internal::is_ranged<Rank,I0,I1,I2,I3,I4>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3,I4>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index(4, i4, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n  \n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3,I4>::value,\n       const Array<internal::is_ranged<Rank,I0,I1,I2,I3,I4>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3,I4>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index(4, i4, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n  \n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5>::value,\n       Array<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5>::count,Type,IsActive> >::type\n     operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index(4, i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index(5, i5, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5>::value,\n       const Array<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5>::count,Type,IsActive> >::type\n     operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index(4, i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index(5, i5, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5,I6>::value,\n       Array<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5,I6>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5,I6>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index(4, i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index(5, i5, inew_rank, ibegin, new_dim, new_offset);\n      update_index(6, i6, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5,I6>::value,\n       const Array<internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5,I6>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const {\n      static const int new_rank = internal::is_ranged<Rank,I0,I1,I2,I3,I4,I5,I6>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index(0, i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index(1, i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index(2, i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index(3, i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index(4, i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index(5, i5, inew_rank, ibegin, new_dim, new_offset);\n      update_index(6, i6, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_ + ibegin, storage_,\n\t\t\t\t\t   new_dim, new_offset);\n    }\n  \n    // If one or more of the indices is not guaranteed to be monotonic\n    // at compile time then we must return an IndexedArray, now done\n    // for all possible numbers of arguments\n\n    // Indexing a 1D array\n    template <typename I0>\n    typename internal::enable_if<Rank == 1 && internal::is_int_vector<I0>::value\n\t\t       && !internal::is_ranged<Rank,I0>::value,\n\t\t       internal::IndexedArray<Rank,Type,IsActive,Array,I0> >::type\n    operator()(const I0& i0) {\n      return internal::IndexedArray<Rank,Type,IsActive,Array,I0>(*this, i0);\n    }\n    template <typename I0>\n    typename internal::enable_if<Rank == 1 && internal::is_int_vector<I0>::value\n\t\t       && !internal::is_ranged<Rank,I0>::value,\n\t\t       const internal::IndexedArray<Rank,Type,IsActive,\n\t\t\t\t\t  Array,I0> >::type\n    operator()(const I0& i0) const {\n      return internal::IndexedArray<Rank,Type,IsActive,\n\t\t\t  Array,I0>(*const_cast<Array*>(this), i0);\n    }\n  \n    // Indexing a 2D array\n    template <typename I0, typename I1>\n    typename internal::enable_if<Rank == 2 && internal::is_irreg_indexed<Rank,I0,I1>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1>::count,\n\t\t\t\t    Type,IsActive,Array,I0,I1> >::type\n    operator()(const I0& i0, const I1& i1) {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1>(*this, i0, i1);\n    }\n    template <typename I0, typename I1>\n    typename internal::enable_if<Rank == 2 && internal::is_irreg_indexed<Rank,I0,I1>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1>::count,\n\t\t\t\t    Type,IsActive,Array,I0,I1> >::type\n    operator()(const I0& i0, const I1& i1) const {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,\n\t\t\t  Array,I0,I1>(*const_cast<Array*>(this), i0, i1);\n    }\n\n    // Indexing a 3D array\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<Rank == 3 && internal::is_irreg_indexed<Rank,I0,I1,I2>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2>::count,\n\t\t\t\t    Type,IsActive,Array,I0,I1,I2> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2) {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,\n\t\t\t  I0,I1,I2>(*this, i0, i1, i2);\n    }\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<Rank == 3 && internal::is_irreg_indexed<Rank,I0,I1,I2>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<Rank,\n\t\t\t\t\t\t\t   I0,I1,I2>::count,\n\t\t\t\t    Type,IsActive,Array,I0,I1,I2> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2) const {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,\n\t\t\t  I0,I1,I2>(*const_cast<Array*>(this), i0, i1, i2);\n    }\n\n    // Indexing a 4D array\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<Rank == 4 && internal::is_irreg_indexed<Rank,I0,I1,I2,I3>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,I3>::count,\n\t\t\t\t    Type,IsActive,Array,I0,I1,I2,I3> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,\n\t\t\t  I0,I1,I2,I3>(*this, i0, i1, i2, i3);\n    }\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<Rank == 4 && internal::is_irreg_indexed<Rank,I0,I1,I2,I3>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,\n\t\t\t\t\t\t\t   I2,I3>::count,\n\t\t\t\t    Type,IsActive,Array,I0,I1,I2,I3> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,\n\t\t\t  I3>(*const_cast<Array*>(this), i0, i1, i2, i3);\n    }\n\n    // Indexing a 5D array\n    template <typename I0, typename I1, typename I2, typename I3, typename I4>\n    typename internal::enable_if<Rank == 5\n\t\t       && internal::is_irreg_indexed<Rank,I0,I1,I2,I3,I4>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,\n\t\t\t\t\t\t     I3,I4>::count,\n\t\t\t    Type,IsActive,Array,I0,I1,I2,I3,I4> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4) {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,I3,\n\t\t\t  I4>(*this, i0, i1, i2, i3, i4);\n    }\n    template <typename I0, typename I1, typename I2, typename I3, typename I4>\n    typename internal::enable_if<Rank == 5\n\t\t       && internal::is_irreg_indexed<Rank,I0,I1,I2,I3,I4>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,\n\t\t\t\t\t\t\t   I3,I4>::count,\n\t\t\t\t  Type,IsActive,Array,I0,I1,I2,I3,I4> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4) const {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,I3,\n\t\t\t  I4>(*const_cast<Array*>(this), i0, i1, i2, i3, i4);\n    }\n\n    // Indexing a 6D array\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5>\n    typename internal::enable_if<Rank == 6\n\t\t       && internal::is_irreg_indexed<Rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t\t   I4,I5>::count,\n\t\t\t  Type,IsActive,Array,I0,I1,I2,I3,I4,I5> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4, const I5& i5) {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,I3,I4,\n\t\t\t  I5>(*this,i0,i1,i2,i3,i4,i5);\n    }\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5>\n    typename internal::enable_if<Rank == 6\n\t\t       && internal::is_irreg_indexed<Rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t\t   I4,I5>::count,\n\t\t\t  Type,IsActive,Array,I0,I1,I2,I3,I4,I5> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4, const I5& i5) const {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,I3,I4,\n\t\t\t  I5>(*const_cast<Array*>(this),i0,i1,i2,i3,i4,i5);\n    }\n\n    // Indexing a 7D array\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5, typename I6>\n    typename internal::enable_if<Rank == 7\n\t\t       && internal::is_irreg_indexed<Rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t     I4,I5,I6>::count,\n\t\t\t  Type,IsActive,Array,I0,I1,I2,I3,I4,I5,I6> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3,\n\t       const I4& i4, const I5& i5, const I6& i6) {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5,I6>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,I3,I4,I5,\n\t\t\t  I6>(*this,i0,i1,i2,i3,i4,i5,i6);\n    }\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5, typename I6>\n    typename internal::enable_if<Rank == 7\n\t\t       && internal::is_irreg_indexed<Rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t\t   I4,I5,I6>::count,\n\t\t\t  Type,IsActive,Array,I0,I1,I2,I3,I4,I5,I6> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3,\n\t       const I4& i4, const I5& i5, const I6& i6) const {\n      static const int new_rank = internal::is_irreg_indexed<Rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5,I6>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,Array,I0,I1,I2,I3,I4,I5,\n\t\t\t  I6>(*const_cast<Array*>(this),i0,i1,i2,i3,i4,i5,i6);\n    }\n\n\n    // Provide a C-array-like array access: for a multidimensional\n    // array, operator[](i), where i is of integer type, returns an\n    // array of rank one less than the original array, where the new\n    // array is \"sliced\" at index i of dimension 0.  For a vector,\n    // operator[](i) returns an l-value to the element at i.  Thus for\n    // a 3D array A, A[1][2][3] returns a single element. Note that\n    // this will be slower than A(1,2,3) because each operator[]\n    // creates a new array (although does not copy the data).\n    template <typename T>\n    typename internal::enable_if<internal::is_scalar_int<T>::value && (Rank > 1),\n      Array<Rank-1,Type,IsActive> >::type\n    operator[](T i) {\n      int index = internal::get_index_with_len(i,dimensions_[0])*offset_[0];\n      ExpressionSize<Rank-1> new_dim;\n      ExpressionSize<Rank-1> new_offset;\n      for (int j = 1; j < Rank; ++j) {\n\tnew_dim[j-1] = dimensions_[j];\n\tnew_offset[j-1] = offset_[j];\n      }\n      return Array<Rank-1,Type,IsActive>(data_ + index,\n\t\t\t\t\t storage_,\n\t\t\t\t\t new_dim, new_offset);\n    }\n\n    // The const version, alas, throws away the constness because we\n    // don't have a way of returning an unmodifiable array\n    template <typename T>\n    typename internal::enable_if<internal::is_scalar_int<T>::value && (Rank > 1),\n      Array<Rank-1,Type,IsActive> >::type\n    operator[](T i) const {\n      int index = internal::get_index_with_len(i,dimensions_[0])*offset_[0];\n      ExpressionSize<Rank-1> new_dim;\n      ExpressionSize<Rank-1> new_offset;\n      for (int j = 1; j < Rank; ++j) {\n\tnew_dim[j-1] = dimensions_[j];\n\tnew_offset[j-1] = offset_[j];\n      }\n      return Array<Rank-1,Type,IsActive>(const_cast<Type*>(data_) + index,\n\t\t\t\t\t storage_,\n\t\t\t\t\t new_dim, new_offset);\n    }\n\n\n    // diag_matrix(), where *this is a 1D array, returns a DiagMatrix\n    // containing the data as the diagonal pointing to the original\n    // data, Can be used as an lvalue.\n    SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>, IsActive>\n    diag_matrix();\n\n    Array<1,Type,IsActive>\n    diag_vector(Index offdiag = 0) {\n      ADEPT_STATIC_ASSERT(Rank == 2, DIAG_VECTOR_ONLY_WORKS_ON_SQUARE_MATRICES);\n      if (empty()) {\n\t// Return an empty vector\n\treturn Array<1,Type,IsActive>();\n      }\n      else if (dimensions_[0] != dimensions_[1]) {\n\tthrow invalid_operation(\"diag_vector member function only applicable to square matrices\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      else if (offdiag >= 0) {\n\tIndex new_dim = std::min(dimensions_[0], dimensions_[1]-offdiag);\n\treturn Array<1,Type,IsActive>(data_+offset_[1]*offdiag, storage_, \n\t\t\t\t      ExpressionSize<1>(new_dim),\n\t\t\t\t      ExpressionSize<1>(offset_[0]+offset_[1]));\n      }\n      else {\n\tIndex new_dim = std::min(dimensions_[0]+offdiag, dimensions_[1]);\n\treturn Array<1,Type,IsActive>(data_-offset_[0]*offdiag, storage_, \n\t\t\t\t      ExpressionSize<1>(new_dim),\n\t\t\t\t      ExpressionSize<1>(offset_[0]+offset_[1]));\n      }\n    }\n  \n    Array\n    submatrix_on_diagonal(Index ibegin, Index iend) {\n      ADEPT_STATIC_ASSERT(Rank == 2,\n\t\tSUBMATRIX_ON_DIAGONAL_ONLY_WORKS_ON_SQUARE_MATRICES);\n      if (dimensions_[0] != dimensions_[1]) {\n\tthrow invalid_operation(\"submatrix_on_diagonal member function only applicable to square matrices\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      else if (ibegin < 0 || ibegin > iend || iend >= dimensions_[0]) {\n\tthrow index_out_of_bounds(\"Dimensions out of range in submatrix_on_diagonal\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\tIndex len = iend-ibegin+1;\n\tExpressionSize<2> dim(len,len);\n\treturn Array(data_+ibegin*(offset_[0]+offset_[1]),\n\t\t     storage_, dim, offset_);\n      }\n    }\n\n    // For extracting contiguous sections out of an array use the\n    // following. Currently this just indexes each dimension with the\n    // contiguous range(a,b) index, but in future it may be optimized.\n\n    // 1D array subset\n    template <typename B0, typename E0>\n    Array\n    subset(const B0& ibegin0, const E0& iend0) {\n      ADEPT_STATIC_ASSERT(Rank == 1,\n\t\t\t  SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY);\n      return (*this)(range(ibegin0,iend0));\n    }\n    template <typename B0, typename E0>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0) const {\n      ADEPT_STATIC_ASSERT(Rank == 1,\n\t\t\t  SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY);\n      return (*this)(range(ibegin0,iend0));\n    }\n\n    // 2D array subset\n    template <typename B0, typename E0, typename B1, typename E1>\n    Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1) {\n      ADEPT_STATIC_ASSERT(Rank == 2,\n\t\t\t  SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1));\n    }\n    template <typename B0, typename E0, typename B1, typename E1>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t  const B1& ibegin1, const E1& iend1) const {\n      ADEPT_STATIC_ASSERT(Rank == 2,\n\t\t\t  SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1));\n    }\n\n    // 3D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2>\n    Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2) {\n      ADEPT_STATIC_ASSERT(Rank == 3,\n\t\t\t  SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2));\n    }     \n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2) const {\n      ADEPT_STATIC_ASSERT(Rank == 3,\n\t\t\t  SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2));\n    }\n\n    // 4D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3>\n    Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3) {\n      ADEPT_STATIC_ASSERT(Rank == 4,\n\t\t\t  SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3) const {\n      ADEPT_STATIC_ASSERT(Rank == 4,\n\t\t\t  SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3));\n    } \n\n    // 5D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4>\n    Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4) {\n      ADEPT_STATIC_ASSERT(Rank == 5,\n\t\t\t  SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4) const {\n      ADEPT_STATIC_ASSERT(Rank == 5,\n\t\t\t  SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4));\n    }\n\n    // 6D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5>\n    Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5) {\n      ADEPT_STATIC_ASSERT(Rank == 6,\n\t\t\t  SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5) const {\n      ADEPT_STATIC_ASSERT(Rank == 6,\n\t\t\t  SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5));\n    }\n\n    // 7D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5,\n\t      typename B6, typename E6>\n    Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5,\n\t   const B6& ibegin6, const E6& iend6) {\n      ADEPT_STATIC_ASSERT(Rank == 7,\n\t\t\t  SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5),\n\t\t     range(ibegin6,iend6));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5,\n\t      typename B6, typename E6>\n    const Array\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5,\n\t   const B6& ibegin6, const E6& iend6) const {\n      ADEPT_STATIC_ASSERT(Rank == 7,\n\t\t\t  SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5),\n\t\t     range(ibegin6,iend6));\n    }\n\n    // -------------------------------------------------------------------\n    // Array: 5. Public member functions\n    // -------------------------------------------------------------------\n  \n    // Link to an existing array of the same rank, type and activeness\n    Array& link(Array& rhs) {\n      if (!rhs.data()) {\n\tthrow empty_array(\"Attempt to link to empty array\"\n\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\tclear();\n\tdata_ = rhs.data();\n\tstorage_ = rhs.storage();\n\tdimensions_.copy(rhs.dimensions());\n\toffset_.copy(rhs.offset());\n\tif (storage_) {\n\t  storage_->add_link();\n\t}\n\tif (IsActive) {\n\t  internal::GradientIndex<IsActive>::set(data_, storage_);\n\t}\n      }\n      return *this;\n    }\n\n    // Fortran-like link syntax A >>= B\n    Array& operator>>=(Array& rhs)\n    { return link(rhs); }  \n\n#ifndef ADEPT_MOVE_SEMANTICS\n    // A common pattern is to link to a subset of another Array,\n    // e.g. vec1.link(vec2(range(2,4))), but the problem is that the\n    // argument to link is a temporary so will not bind to Array&. In\n    // C++98 we therefore need a function taking const Array& and then\n    // cast away the const-ness. This has the unfortunate side effect\n    // that a non-const Array can be linked to a const Array.\n    Array&        link(const Array& rhs) { return link(const_cast<Array&>(rhs)); }\n    Array& operator>>=(const Array& rhs) { return link(const_cast<Array&>(rhs)); }\n#else\n    // But in C++11 we can solve this problem and only bind to\n    // temporary non-const Arrays\n    Array&        link(Array&& rhs) { return link(const_cast<Array&>(rhs)); }\n    Array& operator>>=(Array&& rhs) { return link(const_cast<Array&>(rhs)); }\n#endif\n\n    // To prevent linking to an rvalue expression we write a templated\n    // function that will fail to compile\n    template<class E>\n    typename internal::enable_if<!E::is_lvalue,void>::type\n    link(const Expression<Type,E>&) {\n      ADEPT_STATIC_ASSERT(E::is_lvalue, CAN_ONLY_LINK_TO_AN_LVALUE_EXPRESSION);\n    }\n    template<class E>\n    typename internal::enable_if<!E::is_lvalue,void>::type\n    operator>>=(const Expression<Type,E>&) {\n      ADEPT_STATIC_ASSERT(E::is_lvalue, CAN_ONLY_LINK_TO_AN_LVALUE_EXPRESSION);\n    }\n\n    // STL-like size() returns total length of array\n    Index size() const {\n      Index s = 1;\n      for (int i = 0; i < Rank; ++i) {\n\ts *= dimensions_[i];\n      }\n      return s; \n    }\n\n    // Return constant reference to dimensions\n    const ExpressionSize<Rank>& dimensions() const {\n      return dimensions_;\n    }\n\n    bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n      dim = dimensions_;\n      return true;\n    }\n\n    // Return individual dimension - probably deprecate \"dimension\" in\n    // favour of \"size\"\n    Index dimension(int j) const {\n      return dimensions_[j];\n    }\n    Index size(int j) const {\n      return dimensions_[j];\n    }\n\n    // Return individual offset\n    Index offset(int j) const {\n      return offset_[j];\n    }\n\n    // Return constant reference to offsets\n    const ExpressionSize<Rank>& offset() const {\n      return offset_;\n    }\n\n    const Index& last_offset() const { return offset_[Rank-1]; }\n\n    // Return true if the array is empty\n    bool empty() const { return (dimensions_[0] == 0); }\n\n    // Return a string describing the array\n    std::string info_string() const {\n      std::stringstream str;\n      str << \"Array<\" << Rank << \">, dim=\" << dimensions_ << \", offset=\" << offset_ << \", data_location=\" << data_;\n      if (IsActive) {\n\tstr << \", gradient_index=\" << gradient_index();\n      }\n      return str.str();\n    }\n\n    // Return a pointer to the start of the data\n    Type* data() { return data_; }\n    const Type* data() const { return data_; }\n    const Type* const_data() const { return data_; }\n\n    // Older style\n    Type* data_pointer() { return data_; }\n    const Type* data_pointer() const { return data_; }\n    const Type* const_data_pointer() const { return data_; }\n\n    // For vectors only, we allow a pointer to be returned to a\n    // specified element\n    Type* data_pointer(Index i) { \n      ADEPT_STATIC_ASSERT(Rank == 1, CAN_ONLY_USE_DATA_POINTER_WITH_INDEX_ON_VECTORS);\n      if (data_) {\n\treturn data_ + offset_[0]*i;\n      }\n      else {\n\treturn 0;\n      }\n    }\n    const Type* const_data_pointer(Index i) const { \n      ADEPT_STATIC_ASSERT(Rank == 1, CAN_ONLY_USE_CONST_DATA_POINTER_WITH_INDEX_ON_VECTORS);\n      if (data_) {\n\treturn data_ + offset_[0]*i;\n      }\n      else {\n\treturn 0;\n      }\n    }\n   \n    // Return a pointer to the storage object\n    Storage<Type>* storage() { return storage_; }\n\n    // Reset the array to its original empty state, removing the link\n    // to the data (which may deallocate the data if it was the only\n    // link) and set the dimensions to zero\n    void clear() {\n      if (storage_) {\n\tstorage_->remove_link();\n\tstorage_ = 0;\n      }\n      data_ = 0;\n      dimensions_.set_all(0);\n      offset_.set_all(0);\n      internal::GradientIndex<IsActive>::clear();\n    }\n\n    // Resize an array\n    void\n    resize(const Index* dim, bool force_contiguous = false) {\n\n      ADEPT_STATIC_ASSERT(!(std::numeric_limits<Type>::is_integer\n\t    && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS);\n\n      if (storage_) {\n\tstorage_->remove_link();\n\tstorage_ = 0;\n      }\n      // Check requested dimensions\n      for (int i = 0; i < Rank; ++i) {\n\tif (dim[i] < 0) {\n\t  throw invalid_dimension(\"Negative array dimension requested\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n\telse if (dim[i] == 0) {\n\t  // If any of the dimensions is zero, we clear the array\n\t  // completely and all dimensions will be zero\n\t  clear();\n\t  return;\n\t}\n      }\n      dimensions_.copy(dim); // Copy dimensions\n      if (force_contiguous) {\n\tpack_contiguous_();\n      }\n      else {\n\tpack_();\n      }\n      Index data_vol;\n      if (internal::array_row_major_order) {\n\tdata_vol = offset_[0]*dimensions_[0];\n      }\n      else {\n\tdata_vol = size();\n      }\n      storage_ = new Storage<Type>(data_vol, IsActive);\n      data_ = storage_->data();\n      internal::GradientIndex<IsActive>::set(data_, storage_);\n    }\n\n    // Resize with an ExpressionSize object\n    void resize(const ExpressionSize<Rank>& dim) {\n      resize(&dim[0]);\n    }\n\n    // Resize using contiguous storage with an ExpressionSize object\n    void resize_contiguous(const ExpressionSize<Rank>& dim) {\n      resize(&dim[0], true);\n    }\n\n    // Resize specifying order\n    void resize_row_major(const ExpressionSize<Rank>& dim) {\n      resize(&dim[0]);\n      pack_row_major_();\n    }\n    void resize_row_major_contiguous(const ExpressionSize<Rank>& dim) {\n      resize(&dim[0], true);\n      pack_row_major_contiguous_();\n    }\n    void resize_column_major(const ExpressionSize<Rank>& dim) {\n      resize(&dim[0]);\n      pack_column_major_();\n    }\n\n    // Resize with integer arguments\n    void\n    resize(Index m0, Index m1=-1, Index m2=-1, Index m3=-1,\n\t   Index m4=-1, Index m5=-1, Index m6=-1) {\n      Index dim[7] = {m0, m1, m2, m3, m4, m5, m6};\n      // Check invalid dimensions\n      for (int i = 0; i < Rank; ++i) {\n\tif (dim[i] < 0) {\n\t  throw invalid_dimension(\"Invalid dimensions in array resize\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      resize(dim);\n    }\n\n    void\n    resize_row_major(Index m0, Index m1=-1, Index m2=-1, Index m3=-1,\n\t   Index m4=-1, Index m5=-1, Index m6=-1) {\n      Index dim[7] = {m0, m1, m2, m3, m4, m5, m6};\n      // Check invalid dimensions\n      for (int i = 0; i < Rank; ++i) {\n\tif (dim[i] < 0) {\n\t  throw invalid_dimension(\"Invalid dimensions in array resize\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      resize_row_major(dim);\n    }\n\n    void\n    resize_column_major(Index m0, Index m1=-1, Index m2=-1, Index m3=-1,\n\t   Index m4=-1, Index m5=-1, Index m6=-1) {\n      Index dim[7] = {m0, m1, m2, m3, m4, m5, m6};\n      // Check invalid dimensions\n      for (int i = 0; i < Rank; ++i) {\n\tif (dim[i] < 0) {\n\t  throw invalid_dimension(\"Invalid dimensions in array resize\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      resize_column_major(dim);\n    }\n\n    // Resize with contiguous storage and integer arguments\n    void\n    resize_contiguous(Index m0, Index m1=-1, Index m2=-1, Index m3=-1,\n\t   Index m4=-1, Index m5=-1, Index m6=-1) {\n      Index dim[7] = {m0, m1, m2, m3, m4, m5, m6};\n      // Check invalid dimensions\n      for (int i = 0; i < Rank; ++i) {\n\tif (dim[i] < 0) {\n\t  throw invalid_dimension(\"Invalid dimensions in array resize\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      resize(dim, true);\n    }\n\n\n  protected:\n    // Initialize with \"MyRank\" explicit dimensions, the function\n    // only being defined if MyRank is equal to the actual Rank of\n    // the Array\n    template <int MyRank>\n    typename internal::enable_if<Rank == MyRank,void>::type\n    resize_(Index m0, Index m1=-1, Index m2=-1, Index m3=-1,\n\t   Index m4=-1, Index m5=-1, Index m6=-1) {\n      Index dim[7] = {m0, m1, m2, m3, m4, m5, m6};\n      resize(dim);\n    }\n\n    // Vectorization of arrays of rank>1 is possible provided that the\n    // fastest varying dimension has padding, if necessary, to ensure\n    // alignment\n    template <int ARank>\n    typename internal::enable_if<ARank==1 || ((ARank>1)&&!Packet<Type>::is_vectorized), bool>::type\n    columns_aligned_() const {\n      return true;\n    }\n    template <int ARank>\n    typename internal::enable_if<(ARank>1)&&Packet<Type>::is_vectorized,bool>::type\n    columns_aligned_() const {\n      return offset_[Rank-2] % Packet<Type>::size == 0;\n    }\n\n  public:\n  \n    bool is_aliased_(const Type* mem1, const Type* mem2) const {\n      Type const * ptr_begin;\n      Type const * ptr_end;\n      data_range(ptr_begin, ptr_end);\n      if (ptr_begin <= mem2 && ptr_end >= mem1) {\n\treturn true;\n      }\n      else {\n\treturn false;\n      }\n    }\n    bool all_arrays_contiguous_() const { return offset_[Rank-1] == 1 && columns_aligned_<Rank>(); }\n\n    // Is the first data element aligned to a packet boundary?\n    bool is_aligned_() const {\n      return !(reinterpret_cast<std::size_t>(data_) & Packet<Type>::align_mask);\n      // If we could union data with a uintptr_t object then we could\n      // do the following, but there is no guarantee that uintptr_t\n      // exists :-(\n      //      return !(data_unsigned_int_ & Packet<Type>::align_mask);\n    }\n\n    // Return the number of unaligned elements before reaching the\n    // first element on an alignment boundary, which is in units of\n    // \"n\" Types. The first \"%\" argument finds how many elements the\n    // first element is above an alignment boundary; the following bit\n    // then works out how many elements to the next alignment\n    // boundary.\n    template <int n>\n    int alignment_offset_() const {\n      // This is rather slow!\n      return (n - (reinterpret_cast<std::size_t>(reinterpret_cast<void*>(data_))/sizeof(Type))\n\t      % n) % n;\n    }\n\n    Type value_with_len_(const Index& j, const Index& len) const {\n      ADEPT_STATIC_ASSERT(Rank == 1, CANNOT_USE_VALUE_WITH_LEN_ON_ARRAY_OF_RANK_OTHER_THAN_1);\n      return data_[j*offset_[0]];\n    }\n\n    std::string expression_string_() const {\n      if (true) {\n\tstd::string a = internal::array_helper<Rank,IsActive>().name();\n\ta += dimensions_.str();\n\treturn a;\n      }\n      else {\n\tstd::stringstream s;\n\tprint(s);\n\treturn s.str();\n      }\n    }\n\n    // The same as operator=(inactive scalar) but does not put\n    // anything on the stack\n    template <typename RType>\n    typename internal::enable_if<internal::is_not_expression<RType>::value, Array&>::type\n    set_value(RType x) {\n      if (!empty()) {\n\tassign_inactive_scalar_<Rank,false>(x);\n      }\n      return *this;\n    }\n  \n\n    // Is the array contiguous in memory?\n    bool is_contiguous() const {\n      Index offset_expected = 1;\n      for (int i = Rank-1; i >= 0; ++i) {\n\tif (offset_[i] != offset_expected) {\n\t  return false;\n\t}\n\toffset_expected *= dimensions_[i];\n      }\n      return true;\n    }\n    \n    // Determine whether rows or columns are contiguous in memory and\n    // increasing, needed for calling the BLAS matrix multipliciation\n    // functions; the first can be used to check if the fastest\n    // varying dimension is contiguous, to see if array indexes can be\n    // incremented simply.\n    bool is_row_contiguous() const {\n      //      ADEPT_STATIC_ASSERT(Rank == 2, CANNOT_CHECK_ROW_CONTIGUOUS_IF_NOT_MATRIX);\n      //      return offset_[1] == 1;\n      if (Rank > 1) {\n\treturn offset_[Rank-1] == 1 && offset_[Rank-2] >= dimensions_[Rank-1];\n      }\n      else {\n\treturn offset_[Rank-1] == 1;\n      }\n    }\n    bool is_column_contiguous() const {\n      ADEPT_STATIC_ASSERT(Rank == 2, CANNOT_CHECK_COLUMN_CONTIGUOUS_IF_NOT_MATRIX);\n      return offset_[0] == 1;\n    }\n\n  public:\n    // Return the gradient index for the first element in the array,\n    // or -1 if not active\n    Index gradient_index() const {\n      //      ADEPT_STATIC_ASSERT(IsActive, CANNOT_ACCESS_GRADIENT_INDEX_OF_INACTIVE_ARRAY);\n      //      return my_gradient_index<IsActive>();\n      return internal::GradientIndex<IsActive>::get();\n    }\n\n    /*\n    std::ostream& print(std::ostream& os) const {\n      if (empty()) {\n\tos << \"(empty \" << Rank << \"-D array)\";\n      }\n      else if (adept::internal::array_print_curly_brackets) {\n\tadept::ExpressionSize<Rank> i(0);\n\tint my_rank = -1;\n\tif (Rank > 1) {\n\t  os << \"\\n\";\n\t}\n\tdo {\n\t  for (int r = 0; r < my_rank+1; r++)\n\t    { os << \" \"; }\n\t  for (int r = my_rank+1; r < Rank; r++)\n\t    { os << \"{\"; }\n\t  for (i[Rank-1] = 0; i[Rank-1] < dimensions_[Rank-1]-1; ++i[Rank-1])\n\t    { os << data_[index_(i)] << \", \"; }\n\t  os << data_[index_(i)];\n\t  my_rank = Rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dimensions_[my_rank]) {\n\t      i[my_rank] = 0;\n\t      os << \"}\";\n\t    }\n\t    else {\n\t      os << \"},\\n\";\n\t      break;\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\tif (Rank > 1) {\n\t  os << \"}\"; // \"}/n\"\n\t}\n\telse {\n\t  os << \"}\";\n\t}\n      }\n      else {\n\tadept::ExpressionSize<Rank> i(0);\n\tint my_rank;\n\tdo {\n\t  for (i[Rank-1] = 0; i[Rank-1] < dimensions_[Rank-1]; ++i[Rank-1]) {\n\t    os << \" \" << data_[index_(i)];\n\t  }\n\t  my_rank = Rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dimensions_[my_rank]) {\n\t      i[my_rank] = 0;\n\t    }\n\t    else {\n\t      break;\n\t    }\n\t  }\n\t  os << \"\\n\";\n\t} while (my_rank >= 0);\n      }\n      return os;\n    }\n    */\n\n    std::ostream& print(std::ostream& os) const {\n      using namespace internal;\n      if (empty()) {\n\tos << array_print_empty_before;\n\tif (array_print_empty_rank) {\n\t  os << Rank;\n\t}\n\tos << array_print_empty_after;\n      }\n      else if (Rank == 1) {\n\t// Print a vector\n\tos << vector_print_before << data_[0];\n\tfor (int i = 1; i < dimensions_[0]; ++i) {\n\t  os << vector_separator << data_[i*offset_[0]];\n\t}\n\tos << vector_print_after;\n      }\n      else {\n\t// Print a multi-dimensional array\n\tadept::ExpressionSize<Rank> i(0);\n\tint my_rank = -1;\n\tos << array_print_before;\n\tdo {\n\t  if (array_print_indent) {\n\t    if (my_rank >= 0) {\n\t      os << \" \";\n\t      for (int r = 0; r < my_rank*static_cast<int>(array_opening_bracket.size()); r++) {\n\t\tos << \" \";\n\t      }\n\t    }\n\t  }\n\t  if (my_rank == -1) {\n\t    for (int r = 1; r < Rank; r++) {\n\t      os << array_opening_bracket;\n\t    }\n\t  }\n\t  else {\n\t    for (int r = my_rank+1; r < Rank; r++) {\n\t      os << array_opening_bracket;\n\t    }\n\t  }\n\t  for (i[Rank-1] = 0; i[Rank-1] < dimensions_[Rank-1]-1; ++i[Rank-1]) {\n\t    os << data_[index_(i)] << array_contiguous_separator;\n\t  }\n\t  os << data_[index_(i)];\n\t  my_rank = Rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dimensions_[my_rank]) {\n\t      i[my_rank] = 0;\n\t      os << array_closing_bracket;\n\t    }\n\t    else {\n\t      os << array_closing_bracket << array_non_contiguous_separator;\n\t      break;\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\tos << array_print_after;\n      }\n      return os;\n    }\n\n    // Get pointers to the first and last data members in memory.  \n    void data_range(Type const * &data_begin, Type const * &data_end) const {\n      data_begin = data_;\n      data_end = data_;\n      for (int i = 0; i < Rank; i++) {\n\tif (offset_[i] >= 0) {\n\t  data_end += (dimensions_[i]-1)*offset_[i];\n\t}\n\telse {\n\t  data_begin += (dimensions_[i]-1)*offset_[i];\n\t}\n      }\n    }\n\n  \n    // The Stack::independent(x) and Stack::dependent(y) functions add\n    // the gradient_index of objects x and y to std::vector<uIndex>\n    // objects in Stack. Since x and y may be scalars or arrays, this\n    // is best done by delegating to the Active or Array classes.\n    template <typename IndexType>\n    void push_gradient_indices(std::vector<IndexType>& vec) const {\n      ADEPT_STATIC_ASSERT(IsActive,\n\t\t  CANNOT_PUSH_GRADIENT_INDICES_FOR_INACTIVE_ARRAY); \n      ExpressionSize<Rank> i(0);\n      Index gradient_ind = gradient_index();\n      Index index = 0;\n      int my_rank;\n      vec.reserve(vec.size() + size());\n      do {\n\t// Innermost loop - note that the counter is index, not max_index\n\tfor (Index max_index = index + dimensions_[Rank-1]*offset_[Rank-1];\n\t     index < max_index;\n\t     index += offset_[Rank-1]) {\n\t  vec.push_back(gradient_ind + index);\n\t}\n\t// Increment counters appropriately depending on which\n\t// dimensions have been finished\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    // Return inactive array linked to original data\n    Array<Rank, Type, false> inactive_link() {\n      return Array<Rank, Type, false>(data_, storage_, dimensions_, offset_);\n    }\n\n    // Perform an in-place transpose for 2D arrays only\n    Array& in_place_transpose() {\n      ADEPT_STATIC_ASSERT(Rank == 2, \n\t\t\t  IN_PLACE_TRANSPOSE_ONLY_POSSIBLE_WITH_2D_ARRAYS);\n      Index tmp;\n      // Swap dimensions\n      tmp = dimensions_[0];\n      dimensions_[0] = dimensions_[1];\n      dimensions_[1] = tmp;\n      // Swap offsets\n      tmp = offset_[0];\n      offset_[0] = offset_[1];\n      offset_[1] = tmp;\n      return *this;\n    }\n\n    // Transpose helper functions\n  protected:\n    template<int MyRank>\n    typename internal::enable_if<MyRank == 2, Array<2,Type,IsActive> >::type\n    my_T() {\n      // Transpose 2D array: create output array initially as link\n      // to input array\n      Array<2,Type,IsActive> out(*this);\n      // Swap dimensions\n      return out.in_place_transpose();\n    }\n    template<int MyRank>\n    typename internal::enable_if<MyRank == 2, const Array<2,Type,IsActive> >::type\n    my_T() const {\n      // Transpose 2D array: create output array initially as link\n      // to input array\n      Array<2,Type,IsActive> out(const_cast<Array&>(*this));\n      // Swap dimensions\n      return out.in_place_transpose();\n    }\n\n  public:\n    // Out-of-place transpose\n    Array<2,Type,IsActive>\n    T() {\n      ADEPT_STATIC_ASSERT(Rank == 1 || Rank == 2, \n\t\t\t  TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS);\n      return my_T<Rank>();\n    }\n    const Array<2,Type,IsActive>\n    T() const {\n      ADEPT_STATIC_ASSERT(Rank == 1 || Rank == 2, \n\t\t\t  TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS);\n      return my_T<Rank>();\n    }\n\n    // \"permute\" is a generalized transpose, returning an Array linked\n    // to the current one but with the dimensions rearranged according\n    // to idim: idim[0] is the 0-based number of the dimension of the\n    // current array that will be dimension 0 of the new array,\n    // idim[1] is the number of the dimension of the current array\n    // that will be dimension 1 of the new array and so on.\n    Array permute(const Index* idim) {\n      if (empty()) {\n\tthrow empty_array(\"Attempt to permute an empty array\"\n\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      ExpressionSize<Rank> new_dims(0);\n      ExpressionSize<Rank> new_offset;\n      for (int i = 0; i < Rank; ++i) {\n\tif (idim[i] >= 0 && idim[i] < Rank) {\n\t  new_dims[i] = dimensions_[idim[i]];\n\t  new_offset[i] = offset_[idim[i]];\n\t}\n\telse {\n\t  throw invalid_dimension(\"Dimensions must be in range 0 to Rank-1 in permute\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      for (int i = 0; i < Rank; ++i) {\n\tif (new_dims[i] == 0) {\n\t  throw invalid_dimension(\"Missing dimension in permute\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      return Array(data_, storage_, new_dims, new_offset);\n    }\n\n    Array permute(const ExpressionSize<Rank>& idim) {\n      return permute(&idim[0]);\n    }\n\n    // Up to 7 dimensions we can specify the dimensions as separate\n    // arguments\n    typename internal::enable_if<(Rank < 7), Array>::type\n    permute(Index i0, Index i1, Index i2 = -1, Index i3 = -1, Index i4 = -1,\n\t    Index i5 = -1, Index i6 = -1) {\n      Index idim[7] = {i0, i1, i2, i3, i4, i5, i6};\n      for (int i = 0; i < Rank; ++i) {\n\tif (idim[i] == -1) {\n\t  throw invalid_dimension(\"Incorrect number of dimensions provided to permute\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      return permute(idim);\n    }\n\n    // Only applicable to vectors, return a multi-dimensional array\n    // that links to the data in the vector\n    template <int NewRank>\n    Array<NewRank,Type,IsActive> reshape(const ExpressionSize<NewRank>& dims) {\n      ADEPT_STATIC_ASSERT(Rank == 1, CANNOT_RESHAPE_MULTIDIMENSIONAL_ARRAY);\n      Index new_size = 1;\n      for (int i = 0; i < NewRank; ++i) {\n\tnew_size *= dims[i];\n      }\n      if (new_size != dimensions_[0]) {\n\tthrow invalid_dimension(\"Size of reshaped array does not match original vector\");\n      }\n      ExpressionSize<NewRank> offset;\n      offset[NewRank-1] = offset_[0];\n      for (int i = NewRank-2; i >= 0; --i) {\n\toffset[i] = dims[i+1]*offset[i+1];\n      }\n      return Array<NewRank,Type,IsActive>(data_,storage_,dims,offset);\n    }\n\n    // More convenient interfaces to reshape providing a list of\n    // integer dimensions\n    Array<2,Type,IsActive> reshape(Index i0, Index i1)\n    { return reshape(ExpressionSize<2>(i0,i1)); }\n    Array<3,Type,IsActive> reshape(Index i0, Index i1, Index i2)\n    { return reshape(ExpressionSize<2>(i0,i1,i2)); }\n    Array<4,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3)\n    { return reshape(ExpressionSize<2>(i0,i1,i2,i3)); }\n    Array<5,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3, Index i4)\n    { return reshape(ExpressionSize<2>(i0,i1,i2,i3,i4)); }\n    Array<6,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3,\n\t\t\t\t   Index i4, Index i5)\n    { return reshape(ExpressionSize<2>(i0,i1,i2,i3,i4,i5)); }\n    Array<7,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3,\n\t\t\t\t   Index i4, Index i5, Index i6)\n    { return reshape(ExpressionSize<2>(i0,i1,i2,i3,i4,i5,i6)); }\n\n\n    // Return an Array that is a \"soft\" link to the data in the\n    // present array; that is, it does not copy the Storage object and\n    // increase the reference counter therein. This is useful in a\n    // multi-threaded environment when multiple threads may wish to\n    // subset the same array.\n    Array soft_link() {\n      return Array(data_,0,dimensions_,offset_,gradient_index());\n    }\n    const Array soft_link() const {\n      return Array(data_,0,dimensions_,offset_,gradient_index());\n    }\n\n\n    // Place gradients associated with the present active array into\n    // the equivalent passive array provided as an argument\n    template <typename MyType>\n    void get_gradient(Array<Rank,MyType,false>& gradient) const {\n      ADEPT_STATIC_ASSERT(IsActive,CANNOT_USE_GET_GRADIENT_ON_INACTIVE_ARRAY);\n      if (gradient.empty()) {\n\tgradient.resize(dimensions_);\n      }\n      else if (gradient.dimensions() != dimensions_) {\n\tthrow size_mismatch(\"Attempt to get_gradient with array of different dimensions\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      static const int last = Rank-1;\n      ExpressionSize<Rank> target_offset = gradient.offset();\n      ExpressionSize<Rank> i(0);\n      Index index = 0;\n      int my_rank;\n      Index index_target = 0;\n      Index last_dim_stretch = dimensions_[last]*offset_[last];\n      MyType* target = gradient.data();\n      do {\n\ti[last] = 0;\n\tindex_target = 0;\n\tfor (int r = 0; r < Rank-1; r++) {\n\t  index_target += i[r]*target_offset[r];\n\t}\n\tADEPT_ACTIVE_STACK->get_gradients(gradient_index()+index,\n\t\t\t\t  gradient_index()+index+last_dim_stretch,\n\t\t\t\t  target+index_target, offset_[last], target_offset[last]);\n\tindex += last_dim_stretch;\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    // Return an inactive array of the same type and rank as the\n    // present active array containing the gradients associated with\n    // it\n    Array<Rank,Type,false> get_gradient() const {\n      Array<Rank,Type,false> gradient;\n      get_gradient(gradient);\n      return gradient;\n    }\n\n\n    // Set gradients associated with the present active array to \n    // the equivalent passive array provided as an argument\n    template <typename MyType>\n    void set_gradient(const Array<Rank,MyType,false>& gradient) const {\n      ADEPT_STATIC_ASSERT(IsActive,CANNOT_USE_SET_GRADIENT_ON_INACTIVE_ARRAY);\n      if (gradient.dimensions() != dimensions_) {\n\tthrow size_mismatch(\"Attempt to set_gradient to an array of different dimensions\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      static const int last = Rank-1;\n      ExpressionSize<Rank> src_offset = gradient.offset();\n      ExpressionSize<Rank> i(0);\n      Index index = 0;\n      int my_rank;\n      Index index_src = 0;\n      Index last_dim_stretch = dimensions_[last]*offset_[last];\n      const MyType* src = gradient.data();\n      do {\n\ti[last] = 0;\n\tindex_src = 0;\n\tfor (int r = 0; r < Rank-1; r++) {\n\t  index_src += i[r]*src_offset[r];\n\t}\n\tADEPT_ACTIVE_STACK->set_gradients(gradient_index()+index,\n\t\t\t\t\t  gradient_index()+index+last_dim_stretch,\n\t\t\t\t\t  src+index_src, src_offset[last], offset_[last]);\n\tindex += last_dim_stretch;\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    \n    // std::vector<typename internal::active_scalar<Type,IsActive>::type>\n    // std_vector() const {\n    //   ADEPT_STATIC_ASSERT(Rank == 1, STD_VECTOR_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS);\n    //   std::vector<typename internal::active_scalar<Type,IsActive>::type> data(dimensions_[0]);\n    //   for (Index i = 0; i < dimensions_[0]; ++i) {\n    // \tdata[i] = (*this)(i);\n    //   }\n    //   return data;\n    // }\n\n    void\n    put(std::vector<typename internal::active_scalar<Type,IsActive>::type>& data) const {\n      ADEPT_STATIC_ASSERT(Rank == 1, PUT_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS);\n      if (data.size() != dimensions_[0]) {\n\tdata.resize(dimensions_[0]);\n      }\n      for (Index i = 0; i < dimensions_[0]; ++i) {\n\tdata[i] = (*this)(i);\n      }  \n    }\n\n    void\n    get(const std::vector<typename internal::active_scalar<Type,IsActive>::type>& data) {\n      ADEPT_STATIC_ASSERT(Rank == 1, GET_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS);\n      if (data.size() != dimensions_[0]) {\n\tresize(data.size());\n      }\n      for (Index i = 0; i < dimensions_[0]; ++i) {\n\t(*this)(i) = data[i];\n      }  \n    }\n\n\n    // -------------------------------------------------------------------\n    // Array: 6. Member functions accessed by the Expression class\n    // -------------------------------------------------------------------\n\n    template <int MyArrayNum, int NArrays>\n    void set_location_(const ExpressionSize<Rank>& i, \n\t\t       ExpressionSize<NArrays>& index) const {\n      index[MyArrayNum] = index_(i);\n    }\n    \n    template <int MyArrayNum, int NArrays>\n    Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return data_[loc[MyArrayNum]];\n    }\n    template <int MyArrayNum, int NArrays>\n    Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return Packet<Type>(data_+loc[MyArrayNum]);\n    }\n\n    Type& lvalue_at_location(const Index& loc) {\n      return data_[loc];\n    }\n\n    // Return a scalar\n    template <bool IsAligned, int MyArrayNum, typename PacketType,\n\t      int NArrays>\n    typename internal::enable_if<internal::is_same<Type,PacketType>::value, Type>::type\n    values_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return data_[loc[MyArrayNum]];\n    }\n\n    // Return a Paket from an aligned memory address\n    template <bool IsAligned, int MyArrayNum, typename PacketType,\n\t      int NArrays>\n    typename internal::enable_if<IsAligned && internal::is_same<Packet<Type>,PacketType>::value, PacketType>::type\n    values_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return Packet<Type>(data_+loc[MyArrayNum]);\n    }    \n\n    // Return a Paket from an unaligned memory address\n    template <bool IsAligned, int MyArrayNum, typename PacketType,\n\t      int NArrays>\n    typename internal::enable_if<!IsAligned && internal::is_same<Packet<Type>,PacketType>::value, PacketType>::type\n    values_at_location_(const ExpressionSize<NArrays>& loc) const {\n      // integer dummy second argument indicates unaligned load\n      return Packet<Type>(data_+loc[MyArrayNum], 0); \n    }    \n\n    // Return a scalar\n    template <bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t      typename PacketType, int NArrays, int NScratch>\n    typename internal::enable_if<internal::is_same<Type,PacketType>::value, Type>::type\n    values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t      internal::ScratchVector<NScratch,PacketType>& scratch) const {\n      return data_[loc[MyArrayNum]];\n    }\n\n    // Return a Paket from an aligned memory address\n    template <bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t      typename PacketType, int NArrays, int NScratch>\n    typename internal::enable_if<IsAligned && internal::is_same<Packet<Type>,PacketType>::value, PacketType>::type\n    values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t      internal::ScratchVector<NScratch,PacketType>& scratch) const {\n      return Packet<Type>(data_+loc[MyArrayNum]);\n    }\n    // Return a Paket from an unaligned memory address\n    template <bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t      typename PacketType, int NArrays, int NScratch>\n    typename internal::enable_if<!IsAligned && internal::is_same<Packet<Type>,PacketType>::value, PacketType>::type\n    values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t      internal::ScratchVector<NScratch,PacketType>& scratch) const {\n      return Packet<Type>(data_+loc[MyArrayNum], 0);\n    }\n   \n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t  internal::ScratchVector<NScratch>& scratch) const {\n      return data_[loc[MyArrayNum]];\n\n    }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const internal::ScratchVector<NScratch>& scratch) const {\n      return data_[loc[MyArrayNum]];\n    }\n\n    template <int MyArrayNum, int NArrays>\n    void advance_location_(ExpressionSize<NArrays>& loc) const {\n      loc[MyArrayNum] += offset_[Rank-1];\n    }\n\n    // If an expression leads to calc_gradient being called on an\n    // active object, we push the multiplier and the gradient index on\n    // to the operation stack (or 1.0 if no multiplier is specified\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch) const {\n      stack.push_rhs(1.0, gradient_index() + loc[MyArrayNum]);\n    }\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n    void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch,\n\t\t\tconst MyType& multiplier) const {\n      stack.push_rhs(multiplier, gradient_index() + loc[MyArrayNum]);\n    }\n  \n    template <int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t      int NArrays, int NScratch, int NActive>\n    void calc_gradient_packet_(Stack& stack, \n\t\t\t       const ExpressionSize<NArrays>& loc,\n\t\t\t       const internal::ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t       internal::ScratchVector<NActive,Packet<Real> >& gradients) const {\n      stack.push_rhs_indices<Packet<Real>::size,NActive>(gradient_index() + loc[MyArrayNum]);\n      gradients[MyActiveNum] = Packet<Real>(static_cast<Real>(1.0));\n    }\n\n    template <int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t      int NArrays, int NScratch, int NActive, typename MyType>\n    void calc_gradient_packet_(Stack& stack, \n\t\t\t       const ExpressionSize<NArrays>& loc,\n\t\t\t       const internal::ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t       internal::ScratchVector<NActive,Packet<Real> >& gradients,\n\t\t\t       const MyType& multiplier) const {\n      stack.push_rhs_indices<Packet<Real>::size,NActive>(gradient_index() + loc[MyArrayNum]);\n      gradients[MyActiveNum] = multiplier;\n    }\n\n\n    // -------------------------------------------------------------------\n    // Array: 7. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n\n    // Set the memory offsets from the array dimensions either\n    // assuming C++-style row-major order, or Fortran-style\n    // column-major order. The pack_() function spaces the data so\n    // that all arrays are aligned to packet boundaries, to facilitate\n    // vectorization.\n    void pack_row_major_() {\n      offset_[Rank-1] = 1;\n      if (Rank > 1) {\n\t// Round up to nearest packet size so that all rows are aligned\n\tif (dimensions_[Rank-1] >= Packet<Type>::size*2) {\n\t  offset_[Rank-2] = ((dimensions_[Rank-1] + Packet<Type>::size - 1) / Packet<Type>::size) * Packet<Type>::size;\n\t}\n\telse {\n\t  offset_[Rank-2] = dimensions_[Rank-1];\n\t}\n\tfor (int i = Rank-3; i >= 0; --i) {\n\t  offset_[i] = dimensions_[i+1]*offset_[i+1];\n\t}\n      }\n    }\n    void pack_column_major_() {\n      offset_[0] = 1;\n      for (int i = 1; i < Rank; ++i) {\n\toffset_[i] = dimensions_[i-1]*offset_[i-1];\n      }\n    }\n    void pack_() {\n      if (internal::array_row_major_order) {\n\tpack_row_major_();\n      }\n      else {\n\tpack_column_major_();\n      }\n    }\n\n    // ...while the pack_contiguous_() function makes sure all data\n    // are contiguous in memory\n    void pack_row_major_contiguous_() {\n      offset_[Rank-1] = 1;\n      for (int i = Rank-2; i >= 0; --i) {\n\toffset_[i] = dimensions_[i+1]*offset_[i+1];\n      }\n    }\n\n    void pack_contiguous_() {\n      if (internal::array_row_major_order) {\n\tpack_row_major_contiguous_();\n      }\n      else {\n\tpack_column_major_();\n      }\n    }\n\n    // Return the memory index (relative to data_) for array element\n    // indicated by j\n    Index index_(Index j[Rank]) const {\n      Index o = 0;\n      for (int i = 0; i < Rank; i++) {\n\to += j[i]*offset_[i];\n      }\n      return o;\n    }\n    Index index_(const ExpressionSize<Rank>& j) const {\n      Index o = 0;\n      for (int i = 0; i < Rank; i++) {\n\to += j[i]*offset_[i];\n      }\n      return o;\n    }\n\n    // Used in traversing through an array\n    void advance_index(Index& index, int& rank, ExpressionSize<Rank>& i) const {\n      index -= offset_[Rank-1]*dimensions_[Rank-1];\n      rank = Rank-1;\n      while (--rank >= 0) {\n\tif (++i[rank] >= dimensions_[rank]) {\n\t  i[rank] = 0;\n\t  index -= offset_[rank]*(dimensions_[rank]-1);\n\t}\n\telse {\n\t  index += offset_[rank];\n\t  break;\n\t}\n      }\n    }\n\n    // When assigning a scalar to a whole array, there may be\n    // advantage in specialist behaviour depending on the rank of the\n    // array. This is a generic one that copies the number but treats\n    // the present array as passive.\n    template <int LocalRank, bool LocalIsActive, typename X>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_inactive_scalar_(X x) {\n      ExpressionSize<LocalRank> i(0);\n      Index index = 0;\n      int my_rank;\n      do {\n\t// Innermost loop - note that the counter is index, not max_index\n\tfor (Index max_index = index + dimensions_[LocalRank-1]*offset_[LocalRank-1];\n\t     index < max_index;\n\t     index += offset_[LocalRank-1]) {\n\t  data_[index] = x;\n\t}\n\t// Increment counters appropriately depending on which\n\t// dimensions have been finished\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    // An active array being assigned the value of an inactive scalar\n    template <int LocalRank, bool LocalIsActive, typename X>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_inactive_scalar_(X x) {\n      // If not recording we call the inactive version instead\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (! ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_inactive_scalar_<LocalRank, false, X>(x);\n\treturn;\n      }\n#endif\n\n      ExpressionSize<LocalRank> i(0);\n      Index gradient_ind = gradient_index();\n      Index index = 0;\n      int my_rank;\n      do {\n\t// Innermost loop\n\tADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimensions_[LocalRank-1],\n\t\t\t\t\t   offset_[LocalRank-1]);\n\tfor (Index max_index = index + dimensions_[LocalRank-1]*offset_[LocalRank-1];\n\t     index < max_index; index += offset_[LocalRank-1]) {\n\t  data_[index] = x;\n\t}\n\n\t// Increment counters appropriately depending on which\n\t// dimensions have been finished\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n    // When copying an expression to a whole array, there may be\n    // advantage in specialist behaviour depending on the rank of the\n    // array\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    inline\n    typename internal::enable_if<!LocalIsActive && (!internal::expr_cast<E>::is_vectorizable\n\t\t\t\t\t  || !internal::is_same<typename E::type,Type>::value),void>::type\n    assign_expression_(const E& rhs) {\n      ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY);\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = LocalRank-1;\n      // FIX!!!\n      if (false) { //rhs.all_arrays_contiguous()) {\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, ind);\n\t  // Innermost loop\n\t  for ( ; i[last] < dimensions_[last]; ++i[last],\n\t\t  index += offset_[last]) {\n\t    // Note that this is faster as we know that all indices\n\t    // need to be incremented by 1\n\t    data_[index] = rhs.next_value_contiguous(ind);\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n      else {\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, ind);\n\t  // Innermost loop\n\t  for ( ; i[last] < dimensions_[last]; ++i[last],\n\t\t  index += offset_[last]) {\n\t    data_[index] = rhs.next_value(ind);\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n    }\n\n    // Vectorized version for Rank-1 arrays\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    inline //__attribute__((always_inline))\n    typename internal::enable_if<!LocalIsActive && internal::expr_cast<E>::is_vectorizable && LocalRank == 1\n\t\t       && internal::is_same<typename E::type,Type>::value,void>::type\n      // Removing the reference speeds things up because otherwise E\n      // is dereferenced each loop\n      //  assign_expression_(const E& __restrict rhs) {\n      assign_expression_(const E rhs) {\n      ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY);\n      ExpressionSize<1> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n\n      if (dimensions_[0] >= Packet<Type>::size*2\n\t  && offset_[0] == 1\n\t  && rhs.all_arrays_contiguous()\n\t  ) {\n\t// Contiguous source and destination data\n\tIndex istartvec = 0;\n\tIndex iendvec = 0;\n\n\tistartvec = rhs.alignment_offset();\n\tif (istartvec < 0 || istartvec != alignment_offset_<Packet<Type>::size>()) {\n\t  istartvec = iendvec = 0;\n\t}\n\telse  {\n\t  // Adjust iendvec such that iendvec-istartvec is a multiple\n\t  // of the packet size\n\t  iendvec = (dimensions_[0]-istartvec);\n\t  iendvec -= (iendvec % Packet<Type>::size);\n\t  iendvec += istartvec;\n\t}\n\ti[0] = 0;\n\trhs.set_location(i, ind);\n\tType* const __restrict t = data_; // Avoids an unnecessary load for some reason\n\t// Innermost loop\n\tfor (int index = 0; index < istartvec; ++index) {\n\t  // Scalar version\n\t  t[index] = rhs.next_value_contiguous(ind);\n\t}\n\tfor (int index = istartvec ; index < iendvec;\n\t     index += Packet<Type>::size) {\n\t  // Vectorized version\n\t  //\t    rhs.next_packet(ind).put(data_+index)\n\t  // FIX may need unaligned store\n\t  rhs.next_packet(ind).put(t+index);\n\t}\n\tfor (int index = iendvec ; index < dimensions_[0]; ++index) {\n\t  // Scalar version\n\t  t[index] = rhs.next_value_contiguous(ind);\n\t}\n      }\n      else {\n\t// Non-contiguous source or destination data\n\ti[0] = 0;\n\trhs.set_location(i, ind);\n\tType* const __restrict t = data_; // Avoids an unnecessary load for some reason\n\tfor (int index = 0; i[0] < dimensions_[0]; ++i[0],\n\t       index += offset_[0]) {\n\t  t[index] = rhs.next_value(ind);\n\t}\n      }\n    }\n\n    // Vectorized version\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    inline\n    typename internal::enable_if<!LocalIsActive && internal::expr_cast<E>::is_vectorizable && (LocalRank > 1)\n                       && internal::is_same<typename E::type,Type>::value,void>::type\n    // Removing the reference speeds things up because otherwise E\n    // is dereferenced each loop\n    //  assign_expression_(const E& rhs) \n      assign_expression_(const E rhs) {\n      ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY);\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = LocalRank-1;\n      \n      if (dimensions_[last] >= Packet<Type>::size*2\n\t  && all_arrays_contiguous_()\n\t  && rhs.all_arrays_contiguous()) {\n\t// Contiguous source and destination data\n\tint iendvec;\n\tint istartvec = rhs.alignment_offset();\n\tif (istartvec < 0 || istartvec != alignment_offset_<Packet<Type>::size>()) {\n\t  istartvec = iendvec = 0;\n\t}\n\telse {\n\t  iendvec = (dimensions_[last]-istartvec);\n\t  iendvec -= (iendvec % Packet<Type>::size);\n\t  iendvec += istartvec;\n\t}\n\n\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, ind);\n\t  // Innermost loop\n\t  for ( ; i[last] < istartvec; ++i[last], ++index) {\n\t    // Scalar version\n\t    data_[index] = rhs.next_value_contiguous(ind);\n\t  }\n\t  Type* const __restrict t = data_; // Avoids an unnecessary load for some reason\n\t  for ( ; i[last] < iendvec; i[last] += Packet<Type>::size,\n\t\t  index += Packet<Type>::size) {\n\t    // Vectorized version\n\t    //\t    rhs.next_packet(ind).put(data_+index);\n\t    // FIX may need unaligned store\n\t    rhs.next_packet(ind).put(t+index);\n\t  }\n\t  for ( ; i[last] < dimensions_[last]; ++i[last], ++index) {\n\t    // Scalar version\n\t    data_[index] = rhs.next_value_contiguous(ind);\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n      else {\n\t// Non-contiguous source or destination data\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, ind);\n\t  // Innermost loop\n\t  for ( ; i[last] < dimensions_[last]; ++i[last],\n\t\t  index += offset_[last]) {\n\t    data_[index] = rhs.next_value(ind);\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n    }\n\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    inline\n    typename internal::enable_if<LocalIsActive && EIsActive,void>::type\n  //    assign_expression_(const E& rhs) {\n    assign_expression_(const E rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_expression_<LocalRank,false,false>(rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = LocalRank-1;\n\n      ADEPT_ACTIVE_STACK->check_space(internal::expr_cast<E>::n_active * size());\n\n      if (internal::expr_cast<E>::is_vectorizable && rhs.all_arrays_contiguous()) {\n\t// Contiguous source and destination data\n\tType* const __restrict t = data_; // Avoids an unnecessary load for some reason\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, ind);\n\t  // Innermost loop\n\t  for ( ; i[last] < dimensions_[last]; ++i[last],\n\t\t  index += offset_[last]) {\n\t    t[index] = rhs.next_value_and_gradient_contiguous(*ADEPT_ACTIVE_STACK, ind);\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active?\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n      else {\n\t// Non-contiguous source or destination data\n\tType* const __restrict t = data_; // Avoids an unnecessary load for some reason\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, ind);\n\t  // Innermost loop\n\t  for ( ; i[last] < dimensions_[last]; ++i[last],\n\t\t  index += offset_[last]) {\n\t    t[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, ind);\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active?\n\t  }\n\t  advance_index(index, my_rank, i);\n\t} while (my_rank >= 0);\n      }\n    }\n\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    inline\n    typename internal::enable_if<LocalIsActive && !EIsActive,void>::type\n    assign_expression_(const E& rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_expression_<LocalRank,false,false>(rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      Index gradient_ind = gradient_index();\n      static const int last = LocalRank-1;\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, ind);\n\t// Innermost loop\n\tADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimensions_[LocalRank-1],\n\t\t\t\t\t   offset_[LocalRank-1]);\n\tfor ( ; i[last] < dimensions_[last]; ++i[last],\n\t       index += offset_[last]) {\n\t  data_[index] = rhs.next_value(ind);\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n\n    template<bool LocalIsActive, class B, typename C>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) {\n      ExpressionSize<Rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = Rank-1;\n\n      do {\n\ti[last] = 0;\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimensions_[last]; ++i[last],\n\t       index += offset_[last]) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    data_[index] = rhs;\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    template<bool LocalIsActive, class B, typename C>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (! ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_conditional_inactive_scalar_<false, B, C>(bool_expr, rhs);\n\treturn;\n      }\n#endif\n\n      ExpressionSize<Rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = Rank-1;\n\n      do {\n\ti[last] = 0;\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimensions_[last]; ++i[last],\n\t       index += offset_[last]) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    data_[index] = rhs;\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index);\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    template<bool LocalIsActive, class B, class C>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_conditional_(const B& bool_expr, const C& rhs) {\n      ExpressionSize<Rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      ExpressionSize<internal::expr_cast<C>::n_arrays> rhs_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = Rank-1;\n      bool is_gap = false;\n\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, rhs_ind);\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimensions_[last]; ++i[last],\n\t       index += offset_[last]) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    if (is_gap) {\n\t      rhs.set_location(i, rhs_ind);\n\t      is_gap = false;\n\t    }\n\t    data_[index] = rhs.next_value(rhs_ind);\n\t  }\n\t  else {\n\t    is_gap = true;\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n    template<bool LocalIsActive, class B, class C>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_conditional_(const B& bool_expr, const C& rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_conditional_<false>(bool_expr, rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<Rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      ExpressionSize<internal::expr_cast<C>::n_arrays> rhs_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = Rank-1;\n      bool is_gap = false;\n\n      ADEPT_ACTIVE_STACK->check_space(internal::expr_cast<C>::n_active * size());\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, rhs_ind);\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimensions_[last]; ++i[last],\n\t       index += offset_[last]) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    if (is_gap) {\n\t      rhs.set_location(i, rhs_ind);\n\t      is_gap = false;\n\t    }\n\t    data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, rhs_ind);\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active?\n\t  }\n\t  else {\n\t    is_gap = true;\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n    // -------------------------------------------------------------------\n    // Array: 8. Static variables\n    // -------------------------------------------------------------------\n  public:\n\n\n    void print_style(ArrayPrintStyle ps);\n\n\n    // -------------------------------------------------------------------\n    // Array: 9. Data\n    // -------------------------------------------------------------------\n  protected:\n    Type* __restrict data_;           // Pointer to values\n    Storage<Type>* storage_;          // Pointer to Storage object\n    ExpressionSize<Rank> dimensions_; // Size of each dimension\n    ExpressionSize<Rank> offset_;     // Memory offset for each dimension\n\n  }; // End of Array class\n\n\n  // -------------------------------------------------------------------\n  // Helper functions\n  // -------------------------------------------------------------------\n\n  // Set the default ordering of arrays: if \"true\" use C-style\n  // row-major ordering, otherwise use Fortran-style column-major\n  // ordering\n  inline\n  void set_array_row_major_order(bool o = true) {\n    internal::array_row_major_order = o;\n  }\n\n  // Set the print style\n  void set_array_print_style(ArrayPrintStyle ps);\n\n  inline ArrayPrintStyle get_array_print_style() {\n    return internal::array_print_style;\n  }\n\n  // Change whether or not curly brackets are printed when arrays are\n  // sent to a stream with the << operator\n  inline\n  void set_array_print_curly_brackets(bool o = true) {\n    if (o) {\n      set_array_print_style(PRINT_STYLE_CURLY);\n    }\n    else {\n      set_array_print_style(PRINT_STYLE_PLAIN);\n    }\n  }\n\n  // Print array on a stream\n  template <int Rank, typename Type, bool IsActive>\n  inline\n  std::ostream&\n  operator<<(std::ostream& os, const Array<Rank,Type,IsActive>& A) {\n    return A.print(os);\n  }\n\n\n  // Extract inactive part of array, working correctly depending on\n  // whether argument is active or inactive\n  template <int Rank, typename Type>\n  inline\n  Array<Rank, Type, false>&\n  value(Array<Rank, Type, false>& expr) {\n    return expr;\n  }\n  template <int Rank, typename Type>\n  inline\n  Array<Rank, Type, false>\n  value(Array<Rank, Type, true>& expr) {\n    return expr.inactive_link();\n  }\n\n  // Print an array expression on a stream\n  template <typename Type, class E>\n  inline\n  typename internal::enable_if<(E::rank > 0), std::ostream&>::type\n  operator<<(std::ostream& os, const Expression<Type,E>& expr) {\n    Array<E::rank,Type,false> A;\n    A.assign_inactive(expr);\n    return A.print(os);\n  }\n\n  // -------------------------------------------------------------------\n  // Transpose function\n  // -------------------------------------------------------------------\n\n  // Transpose 2D array\n  template<typename Type, bool IsActive>\n  inline\n  Array<2,Type,IsActive>\n  transpose(Array<2,Type,IsActive>& in) {\n    // Create output array initially as link to input array \n    Array<2,Type,IsActive> out(in);\n    // Swap dimensions\n    return out.in_place_transpose();\n  }\n\n  // Transpose 1D array, treating it as a length N column vector, so\n  // returning a 1xN 2D array\n  template<typename Type, bool IsActive>\n  inline\n  Array<2,Type,IsActive>\n  transpose(Array<1,Type,IsActive>& in) {\n    return Array<2,Type,IsActive>(in.data(), in.storage(),\n\t\t\t\t  ExpressionSize<2>(1,in.dimension(0)),\n\t\t\t\t  ExpressionSize<2>(in.dimension(0)*in.offset(0),in.offset(0)));\n  }\n\n  // Transpose a 2D expression\n  template<typename Type, class E>\n  inline\n  typename internal::enable_if<E::rank == 2, Array<2,Type,E::is_active> >::type\n  transpose(const Expression<Type,E>& in) {\n    // Create output array by evaluating input expression\n    Array<2,Type,E::is_active> out(in);\n    // Swap dimensions\n    return out.in_place_transpose();\n  }\n\n  // Transpose a 1D expression\n  template<typename Type, class E>\n  inline\n  typename internal::enable_if<E::rank == 1, Array<2,Type,E::is_active> >::type\n  transpose(const Expression<Type,E>& in) {\n    Array<1,Type,E::is_active> out_1D(in);\n    return Array<2,Type,E::is_active>(out_1D.data(), out_1D.storage(),\n\t\t\t\t      ExpressionSize<2>(1,out_1D.dimension(0)),\n\t\t\t\t      ExpressionSize<2>(out_1D.dimension(0)*out_1D.offset(0),out_1D.offset(0)));\n  }\n\n  // Extract the gradients from an active Array after the\n  // Stack::forward or Stack::reverse functions have been called\n  template<int Rank, typename Type, typename dType>\n  inline\n  void get_gradients(const Array<Rank,Type,true>& a, Array<Rank,dType,false>& data)\n  {\n    data = a.get_gradient();\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/ArrayWrapper.h",
    "content": "/* ArrayWrapper.h -- Make Arrays work faster in expressions\n\n    Copyright (C) 2016-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptArrayWrapper_H\n#define AdeptArrayWrapper_H 1\n\n//#include <adept/Array.h>\n\nnamespace adept {\n\n  // Forward declaration of Array class\n  template <int Rank, typename Type, bool IsActive> class Array;\n  \n  namespace internal {\n\n    template<int Rank, typename Type, bool IsActive>\n    struct ArrayWrapper : public Expression<Type,ArrayWrapper<Rank,Type,IsActive> > {\n\n      typedef Array<Rank,Type,IsActive> MyArray;\n\n      // Static definitions to enable the properties of this type of\n      // expression to be discerned at compile time\n      static const bool is_active  = IsActive;\n      static const bool is_lvalue  = true;\n      static const int  rank       = Rank;\n      static const int  n_active   = IsActive * (1 + is_complex<Type>::value);\n      static const int  n_scratch  = 0;\n      static const int  n_arrays   = 1;\n      static const bool is_vectorizable = MyArray::is_vectorizable;\n      \n      ArrayWrapper(const MyArray& a) : data(a.const_data()), array(a) { }\n      \n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn array.get_dimensions_(dim);\n      }\n      \n      std::string expression_string_() const {\n\treturn std::string(\"wrapped\") + array.expression_string_();\n      }\n      \n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn array.is_aliased(mem1, mem2);\n      }\n      \n      bool all_arrays_contiguous_() const { \n\treturn array.all_arrays_contiguous_();\n      }\n      \n      bool is_aligned_() const {\n\treturn array.is_aligned_();\n      }\n      \n      template <int n>\n      int alignment_offset_() const {\n\treturn array.template alignment_offset_<n>();\n      }\n      \n      Type value_with_len_(const Index& j, const Index& len) const {\n\treturn array.value_with_len_(j,len);\n      }\n      \n      // Optimize by storing the offset of the fastest-varying dimension?\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\tarray.template advance_location_<MyArrayNum>(loc);\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn data[loc[MyArrayNum]];\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn Packet<Type>(data+loc[MyArrayNum]);\n      }\n      \n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn data[loc[MyArrayNum]];\n      }\n      \n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn data[loc[MyArrayNum]];\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\tarray.template set_location_<MyArrayNum>(i, index);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tarray.template calc_gradient_<MyArrayNum,MyScratchNum>(stack, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n\tarray.template calc_gradient_<MyArrayNum,MyScratchNum>(stack, loc, scratch, multiplier);\n      }\n         \n      \n    protected:\n      //      typedef Type __attribute__((aligned(32))) aligned_type;\n      Type const * const __restrict data;\n      //aligned_type const * const __restrict data;\n      const MyArray& __restrict array;\n    };\n    \n    // Unary and binary operations normally contain constant\n    // references to their arguments, but if that reference is an\n    // Array then the compiler represents this reference as a pointer\n    // that must be dereferenced every time a value is extracted from\n    // the Array. To speed this up, nested_expression<ExprType>::type\n    // is used to obtain the constant reference to ExprType, but for\n    // passive Arrays an ArrayWrapper object is returned instead that\n    // is faster.\n    template <class T>\n    struct nested_expression {\n      typedef const T& __restrict type;\n    };\n\n    template <int Rank, typename Type, bool IsActive>\n    struct nested_expression<Array<Rank,Type,IsActive> > {\n      typedef const ArrayWrapper<Rank,Type,IsActive> type;\n    };\n\n    template <class Type, template<class> class Op, class R>\n    struct UnaryOperation;\n    template <class Type, class L, class Op, class R>\n    struct BinaryOperation;\n\n    // Should we check that rank is > 1?\n    template <class Type, template<class> class Op, class R>\n    struct nested_expression<UnaryOperation<Type,Op,R> > {\n      typedef UnaryOperation<Type,Op,R> type;\n    };\n    template <class Type, class L, class Op, class R>\n    struct nested_expression<BinaryOperation<Type,L,Op,R> > {\n      typedef BinaryOperation<Type,L,Op,R> type;\n    };\n    \n  }\n}\n\n\n#endif\n"
  },
  {
    "path": "include/adept/BinaryOperation.h",
    "content": "/* BinaryOperation.h -- Binary operations on Adept expressions\n\n    Copyright (C) 2014-2018 European Centre for Medium-Range Weather Forecasts\n\n    Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptBinaryOperation_H\n#define AdeptBinaryOperation_H\n\n#include <adept/Expression.h>\n\n#include <adept/ArrayWrapper.h>\n\nnamespace adept {\n  namespace internal {\n\n    // ---------------------------------------------------------------------\n    // SECTION 4.1: Binary operations: define BinaryOperation type\n    // ---------------------------------------------------------------------\n\n    // Binary operations derive from this class, where Op is a policy\n    // class defining how to implement the operation and L and R are\n    // the arguments to the operation\n    template <class Type, class L, class Op, class R>\n    struct BinaryOperation\n      : public Expression<Type, BinaryOperation<Type, L, Op, R> >,\n\tprotected Op {\n\n      // Static data\n      static const int  rank  = (L::rank > R::rank ? L::rank : R::rank);\n      static const bool is_active = (L::is_active || R::is_active) \n\t&& !is_same<Type, bool>::value;\n      static const int  store_result = is_active * Op::store_result;\n      static const int  n_active = expr_cast<L>::n_active + expr_cast<R>::n_active;\n      // Assume the only local scratch variable is the result of the\n      // binary expression\n      static const int  n_local_scratch = store_result; \n      //\t+ Op::n_scratch<L::is_active,R::is_active>::value\n      static const int  n_scratch \n        = n_local_scratch + L::n_scratch + R::n_scratch;\n      static const int  n_arrays  = L::n_arrays + R::n_arrays;\n      static const bool is_vectorizable\n\t= L::is_vectorizable && R::is_vectorizable && Op::is_vectorized\n\t&& is_same<typename L::type,typename R::type>::value;\n\n      using Op::is_operator;\n      using Op::operation;\n      using Op::operation_string;\n      \n      // DATA\n      //const L& left;\n      //const R& right;\n      const typename nested_expression<L>::type left;\n      const typename nested_expression<R>::type right;\n\n      BinaryOperation(const Expression<typename L::type, L>& left_,\n\t\t      const Expression<typename R::type, R>& right_)\n\t: left(left_.cast()), right(right_.cast()) { \n      }\n      \n      template <int Rank>\n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn my_get_dimensions<L::rank != 0, R::rank != 0>(dim);\n      }\n\n    protected:\n\n      template <bool LIsArray, bool RIsArray, int Rank>\n      typename enable_if<LIsArray && RIsArray, bool>::type\n      my_get_dimensions(ExpressionSize<Rank>& dim) const {\n\tExpressionSize<Rank> right_dim;\n\treturn left.get_dimensions(dim)\n\t  && right.get_dimensions(right_dim)\n\t  && compatible(dim, right_dim);\n      }\n\n      template <bool LIsArray, bool RIsArray, int Rank>\n      typename enable_if<LIsArray && !RIsArray, bool>::type\n      my_get_dimensions(ExpressionSize<Rank>& dim) const {\n\treturn left.get_dimensions(dim);\n      }\n\n      template <bool LIsArray, bool RIsArray, int Rank>\n      typename enable_if<!LIsArray && RIsArray, bool>::type\n      my_get_dimensions(ExpressionSize<Rank>& dim) const {\n\treturn right.get_dimensions(dim);\n      }\n\n      template <bool LIsArray, bool RIsArray, int Rank>\n      typename enable_if<!LIsArray && !RIsArray, bool>::type\n      my_get_dimensions(ExpressionSize<Rank>& dim) const {\n\treturn true;\n      }\n\n    public:\n\n      std::string expression_string_() const {\n\tstd::string str;\n\tif (is_operator) {\n\t  str = \"(\" + left.expression_string()\n\t    + operation_string()\n\t    + right.expression_string() + \")\";\n\t}\n\telse {\n\t  str = operation_string();\n\t  str += \"(\" + left.expression_string()\n\t    + \",\" + right.expression_string() + \")\";\n\t}\n\treturn str;\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn left.is_aliased(mem1, mem2) || right.is_aliased(mem1, mem2);\n      }\n      bool all_arrays_contiguous_() const { \n\treturn left.all_arrays_contiguous_()\n\t  &&  right.all_arrays_contiguous_();\n      }\n\n      bool is_aligned_() const {\n\treturn left.is_aligned_() && right.is_aligned_();\n      }\n      \n      template <int n>\n      int alignment_offset_() const {\n\tint l = left.template alignment_offset_<n>();\n\tint r = right.template alignment_offset_<n>();\n\tif (l == r) {\n\t  return l;\n\t}\n\telse if (l == n) {\n\t  return r;\n\t} else if (r == n) {\n\t  return l;\n\t}\n\telse {\n\t  return -1;\n\t}\n      }\n\n      Type value_with_len_(const Index& j, const Index& len) const {\n\treturn operation(left.value_with_len(j,len), \n\t\t\tright.value_with_len(j,len));\n      }\n\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\tleft.template advance_location_<MyArrayNum>(loc);\n\tright.template advance_location_<MyArrayNum+L::n_arrays>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left.template value_at_location_<MyArrayNum>(loc),\n\t\t\t right.template value_at_location_<MyArrayNum+L::n_arrays>(loc));\n      }\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left.template packet_at_location_<MyArrayNum>(loc),\n\t\t\t right.template packet_at_location_<MyArrayNum+L::n_arrays>(loc));\n      }\n\n      template <bool IsAligned,\tint MyArrayNum, typename PacketType,\n\tint NArrays>\n      PacketType values_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left.template  values_at_location_<IsAligned,MyArrayNum,PacketType>(loc),\n\t\t\t right.template values_at_location_<IsAligned,MyArrayNum+L::n_arrays,PacketType>(loc));\n      }\n\n      template <bool UseStored, bool IsAligned,\tint MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      PacketType values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn my_values_at_location_store_<store_result,UseStored,IsAligned,\n\t\t\t\t\t    MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n      // Adept-1.x did not store for addition and subtraction!\n      // Moreover, we should ideally not ask inactive arguments to\n      // store their result.\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn my_value_at_location_store_<store_result,MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n      // Adept-1.x did not store for addition and subtraction!\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn my_value_stored_<store_result,MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n    protected:\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult==1, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] \n\t  = operation(left.template value_at_location_store_<MyArrayNum,MyScratchNum+n_local_scratch>(loc, scratch),\n\t\t      right.template value_at_location_store_<MyArrayNum+L::n_arrays,\n\t\t\t\t\t\t     MyScratchNum+L::n_scratch+n_local_scratch>(loc, scratch));\n      }\n\n      // In differentiating \"a/b\", it helps to store \"1/b\";\n      // \"operation_store\" is only provided by Divide and Atan2\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult==2, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] \n\t  = Op::operation_store(left.template value_at_location_store_<MyArrayNum,MyScratchNum+n_local_scratch>(loc, scratch),\n\t\t\t    right.template value_at_location_store_<MyArrayNum+L::n_arrays,\n\t\t\t    MyScratchNum+L::n_scratch+n_local_scratch>(loc, scratch),\n\t\t\t    scratch[MyScratchNum+1]);\n      }\n\n      // Adept-1.x did not store for addition and subtraction!\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      typename enable_if<(StoreResult > 0), Type>::type\n      my_value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult==0, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn operation(left.template value_at_location_store_<MyArrayNum,MyScratchNum+n_local_scratch>(loc, scratch),\n\t\t\t right.template value_at_location_store_<MyArrayNum+L::n_arrays,\n\t\t\t MyScratchNum+L::n_scratch+n_local_scratch>(loc, scratch));\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      typename enable_if<StoreResult==0, Type>::type\n      my_value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn operation(left.template value_at_location_<MyArrayNum>(loc),\n\t\t\t right.template value_at_location_<MyArrayNum+L::n_arrays>(loc));\n      }\n    \n      template <int StoreResult, bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<StoreResult==1 && !UseStored, PacketType>::type\n      my_values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn scratch[MyScratchNum]\n\t  = operation(left.template values_at_location_store_<UseStored,IsAligned,MyArrayNum,\n\t\t                                     MyScratchNum+n_local_scratch>(loc, scratch),\n\t\t      right.template values_at_location_store_<UseStored,IsAligned,MyArrayNum+L::n_arrays,\n\t\t                                     MyScratchNum+L::n_scratch+n_local_scratch>(loc, scratch));\n      }\n\n      template <int StoreResult, bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<StoreResult==2 && !UseStored, PacketType>::type\n      my_values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn scratch[MyScratchNum]\n\t  = Op::operation_store(left.template values_at_location_store_<UseStored,IsAligned,MyArrayNum,\n\t\t                                     MyScratchNum+n_local_scratch>(loc, scratch),\n\t\t\t\tright.template values_at_location_store_<UseStored,IsAligned,MyArrayNum+L::n_arrays,\n\t\t\t\t                     MyScratchNum+L::n_scratch+n_local_scratch>(loc, scratch),\n\t\t\t\tscratch[MyScratchNum+1]);\n      }\n\n      template <int StoreResult, bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<(StoreResult>0) && UseStored, PacketType>::type\n      my_values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int StoreResult, bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<StoreResult==0 && !UseStored, PacketType>::type\n      my_values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn operation(left.template values_at_location_store_<UseStored,IsAligned,MyArrayNum,\n\t\t                                     MyScratchNum+n_local_scratch>(loc, scratch),\n\t\t\t right.template values_at_location_store_<UseStored,IsAligned,MyArrayNum+L::n_arrays,\n\t\t                                     MyScratchNum+L::n_scratch+n_local_scratch>(loc, scratch));\n      }\n\n      template <int StoreResult, bool UseStored, bool IsAligned, int MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<StoreResult==0 && UseStored, PacketType>::type\n      my_values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn operation(left.template values_at_location_<IsAligned,MyArrayNum,PacketType>(loc),\n\t\t\t right.template values_at_location_<IsAligned,MyArrayNum+L::n_arrays,PacketType>(loc));\n      }\n\n    public:\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\tleft.template set_location_<MyArrayNum>(i, index);\n\tright.template set_location_<MyArrayNum+L::n_arrays>(i, index);\n      }\n\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n        calc_left_ <MyArrayNum, MyScratchNum>(stack, left,  loc, scratch);\n        calc_right_<MyArrayNum, MyScratchNum>(stack, right, loc, scratch);\n      }\n      // As the previous but multiplying the gradient by \"multiplier\"\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n        calc_left_ <MyArrayNum, MyScratchNum>(stack, left,  loc, scratch, multiplier);\n        calc_right_<MyArrayNum, MyScratchNum>(stack, right, loc, scratch, multiplier);\n      }\n    \n    protected:\n      // Only calculate gradients for left and right arguments if they\n      // are active; otherwise do nothing\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType>\n      typename enable_if<LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tOp::template calc_left<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType>\n      typename enable_if<!LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType>\n      typename enable_if<RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tOp::template calc_right<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType>\n      typename enable_if<!RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType, typename MyType>\n      typename enable_if<LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tOp::template calc_left<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch, multiplier);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType, typename MyType>\n      typename enable_if<!LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const { }\n\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType, typename MyType>\n      typename enable_if<RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tOp::template calc_right<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch, multiplier);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType, typename MyType>\n      typename enable_if<!RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const { }\n    };\n  \n\n    // ---------------------------------------------------------------------\n    // SECTION 4.2: policy classes for BinaryOperation: with scalars\n    // ---------------------------------------------------------------------\n\n    // Binary operations with a non-Expression on the left-hand-side\n    template <class Type, typename L, class Op, class R>\n    struct BinaryOpScalarLeft\n      : public Expression<Type, BinaryOpScalarLeft<Type, L, Op, R> >,\n\tprotected Op {\n\n      // Static data\n      static const int rank  = R::rank;\n      static const bool is_active = R::is_active && !is_same<Type, bool>::value;\n      static const int  store_result = is_active * Op::store_result;\n      static const int n_active = expr_cast<R>::n_active;\n      // Assume the only local scratch variable is the result of the\n      // binary expression\n      static const int  n_local_scratch = store_result; \n      //\t+ Op::n_scratch<L::is_active,R::is_active>::value\n      static const int  n_scratch\n        = n_local_scratch + R::n_scratch;\n      static const int  n_arrays  = R::n_arrays;\n      static const bool is_vectorizable = R::is_vectorizable && Op::is_vectorized\n\t&& is_same<L,typename R::type>::value;\n\n      using Op::is_operator;\n      using Op::operation;\n      using Op::operation_string;\n      \n      // DATA\n      Packet<L> left;\n      const R& right;\n\n      BinaryOpScalarLeft(L left_,  const Expression<typename R::type, R>& right_)\n\t: left(left_), right(right_.cast()) { \n      }\n      \n      template <int Rank>\n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn right.get_dimensions(dim);\n      }\n\n      std::string expression_string_() const {\n\tstd::stringstream s;\n\tif (is_operator) {\n\t  s << \"(\" << left.value() << operation_string()\n\t    << right.expression_string() << \")\";\n\t}\n\telse {\n\t  s << operation_string() << \"(\" << left.value() << \",\"\n\t    << static_cast<const R*>(&right)->expression_string() << \")\";\n\t}\n\treturn s.str();\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn right.is_aliased(mem1, mem2);\n      }\n      bool all_arrays_contiguous_() const {\n\treturn right.all_arrays_contiguous_(); \n      }\n\n       bool is_aligned_() const {\n\treturn right.is_aligned_();\n      }    \n\n      template <int n>\n      int alignment_offset_() const { return right.template alignment_offset_<n>(); }\n\n      Type value_with_len_(const Index& j, const Index& len) const {\n\treturn operation(left.value(), right.value_with_len(j,len));\n      }\n\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\tright.template advance_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left.value(), right.template value_at_location_<MyArrayNum>(loc));\n      }\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left, \n\t\t\t right.template packet_at_location_<MyArrayNum>(loc));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn my_value_at_location_store_<store_result,MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn my_value_stored_<store_result,MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n    protected:\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult == 1, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] = operation(left.value(),\n\t\t      right.template value_at_location_store_<MyArrayNum, MyScratchNum+n_local_scratch>(loc, scratch));\n      }\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult == 2, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] = Op::operation_store(left.value(),\n\t       right.template value_at_location_store_<MyArrayNum, MyScratchNum+n_local_scratch>(loc, scratch),\n\t       scratch[MyScratchNum+1]);\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      typename enable_if<(StoreResult > 0), Type>::type\n      my_value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult == 0, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn operation(left.value(),\n\t     right.template value_at_location_store_<MyArrayNum,MyScratchNum+n_local_scratch>(loc, scratch));\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      typename enable_if<StoreResult == 0, Type>::type\n      my_value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn operation(left.value(),right.template value_at_location_<MyArrayNum>(loc));\n      }\n    \n\n    public:\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\tright.template set_location_<MyArrayNum>(i, index);\n      }\n\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n        calc_right_<MyArrayNum, MyScratchNum>(stack, right, loc, scratch);\n      }\n      // As the previous but multiplying the gradient by \"multiplier\"\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n        calc_right_<MyArrayNum, MyScratchNum>(stack, right, loc, scratch, multiplier);\n      }\n    \n    protected:\n      // Only calculate gradients arguments if they are active;\n      // otherwise do nothing\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType>\n      typename enable_if<RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tOp::template calc_right<MyArrayNum, MyScratchNum>(stack, Scalar<L>(left.value()), right, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType>\n      typename enable_if<!RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType, typename MyType>\n      typename enable_if<RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tOp::template calc_right<MyArrayNum, MyScratchNum>(stack, Scalar<L>(left.value()), right, loc, scratch, multiplier);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class RType, typename MyType>\n      typename enable_if<!RType::is_active,void>::type\n      calc_right_(Stack& stack, const RType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const { }\n    };\n\n\n\n\n    // Binary operations with a non-Expression on the right-hand-side\n    template <class Type, typename L, class Op, class R>\n    struct BinaryOpScalarRight\n      : public Expression<Type, BinaryOpScalarRight<Type, L, Op, R> >,\n\tprotected Op {\n\n      // Static data\n      static const int rank  = L::rank;\n      static const bool is_active = L::is_active && !is_same<Type,bool>::value;\n      static const int  store_result = is_active * Op::store_result;\n      static const int n_active  = expr_cast<L>::n_active;\n      // Assume the only local scratch variable is the result of the\n      // binary expression\n      static const int  n_local_scratch = store_result; \n      //\t+ Op::n_scratch<L::is_active,R::is_active>::value\n      static const int  n_scratch\n        = n_local_scratch + L::n_scratch;\n      static const int  n_arrays  = L::n_arrays;\n      static const bool is_vectorizable = L::is_vectorizable && Op::is_vectorized\n\t&& is_same<typename L::type,R>::value;\n\n      using Op::is_operator;\n      using Op::operation;\n      using Op::operation_string;\n      \n      // DATA\n      const L& left;\n      Packet<R> right;\n\n      BinaryOpScalarRight(const Expression<typename L::type, L>& left_, R right_)\n\t: left(left_.cast()), right(right_) {\n\t// Some operations (divide and atan2) store one extra piece of\n\t// information during differentiation, so have\n\t// store_result==2.  This should not be needed when the RHS is\n\t// scalar, so has not been implemented.\n\tADEPT_STATIC_ASSERT((!is_active || store_result<2), ERROR_IN_BINARY_OP_SCALAR_RIGHT);\n      }\n      \n      template <int Rank>\n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn left.get_dimensions(dim);\n      }\n\n      std::string expression_string_() const {\n\tstd::stringstream s;\n\tif (is_operator) {\n\t  s << \"(\" << left.expression_string() << operation_string()\n\t    << right.value() << \")\";\n\t}\n\telse {\n\t  s << operation_string() << \"(\"\n\t    << static_cast<const L*>(&left)->expression_string() << \",\"\n\t    << right.value() << \")\";\n\t}\n\treturn s.str();\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn left.is_aliased(mem1, mem2);\n      }\n      bool all_arrays_contiguous_() const {\n\treturn left.all_arrays_contiguous_(); \n      }\n\n      bool is_aligned_() const {\n\treturn left.is_aligned_();\n      }\n\n      template <int n>\n      int alignment_offset_() const { return left.template alignment_offset_<n>(); }\n\n      Type value_with_len_(const Index& j, const Index& len) const {\n\treturn operation(left.value_with_len(j,len), right.value());\n      }\n\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\tleft.template advance_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left.template value_at_location_<MyArrayNum>(loc), right.value());\n      }\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(left.template packet_at_location_<MyArrayNum>(loc),\n\t\t\t right);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn my_value_at_location_store_<store_result,MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn my_value_stored_<store_result,MyArrayNum,MyScratchNum>(loc, scratch);\n      }\n\n    protected:\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<(StoreResult > 0), Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] = operation(\n\t left.template value_at_location_store_<MyArrayNum, MyScratchNum+n_local_scratch>(loc, scratch), right.value());\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      typename enable_if<(StoreResult > 0), Type>::type\n      my_value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch>\n      typename enable_if<StoreResult == 0, Type>::type\n      my_value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t       ScratchVector<NScratch>& scratch) const {\n\treturn operation(left.template value_at_location_store_<MyArrayNum,MyScratchNum+n_local_scratch>(loc, scratch), \n\t\t\t right.value());\n      }\n\n      template <int StoreResult, int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      typename enable_if<StoreResult == 0, Type>::type\n      my_value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const ScratchVector<NScratch>& scratch) const {\n\treturn operation(left.template value_at_location_<MyArrayNum>(loc), right.value());\n      }\n    \n\n    public:\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\tleft.template set_location_<MyArrayNum>(i, index);\n      }\n\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n        calc_left_<MyArrayNum, MyScratchNum>(stack, left, loc, scratch);\n      }\n      // As the previous but multiplying the gradient by \"multiplier\"\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n        calc_left_<MyArrayNum, MyScratchNum>(stack, left, loc, scratch, multiplier);\n      }\n    \n    protected:\n      // Only calculate gradients arguments if they are active;\n      // otherwise do nothing\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType>\n      typename enable_if<LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tOp::template calc_left<MyArrayNum, MyScratchNum>(stack, left, Scalar<R>(right.value()), loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType>\n      typename enable_if<!LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType, typename MyType>\n      typename enable_if<LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tOp::template calc_left<MyArrayNum, MyScratchNum>(stack, left, Scalar<R>(right.value()), loc, scratch, multiplier);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class LType, typename MyType>\n      typename enable_if<!LType::is_active,void>::type\n      calc_left_(Stack& stack, const LType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const { }\n\t};\n \n  } // End namespace internal\n\n\n\n\n  namespace internal {\n\n    // ---------------------------------------------------------------------\n    // SECTION 4.3: policy classes for BinaryOperation: standard operators\n    // ---------------------------------------------------------------------\n\n    // Policy class implementing operator+\n    struct Add {\n      static const bool is_operator  = true;  // Operator or function for expression_string()\n      static const int  store_result = 0;     // Do we need any scratch space?\n      static const bool is_vectorized = true;\n\n      const char* operation_string() const { return \"+\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const { return left + right; }\n      \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch);\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch);\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier);\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, multiplier);\n      }\n    };\n\n    // Policy class implementing operator-\n    struct Subtract {\n      static const bool is_operator  = true;  // Operator or function for expression_string()\n      static const int  store_result = 1;     // Do we need any scratch space?\n      static const bool is_vectorized = true;\n\n      const char* operation_string() const { return \"-\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const { return left - right; }\n      \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch);\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, -1.0);\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier);\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, -multiplier);\n      }\n    };\n\n\n    // Policy class implementing operator*\n    struct Multiply {\n      static const bool is_operator  = true; // Operator or function for expression_string()\n      static const int  store_result = 1;    // Do we need any scratch space? (this can be 0 or 1)\n      static const bool is_vectorized = true;\n\n      const char* operation_string() const { return \"*\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const { return left * right; }\n      \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      static void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t    right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch));\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      static void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t\t\t\t   left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch));\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      static void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier\n\t    *right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch));\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      static void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t\t   multiplier*left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch));\n      }\n    };\n\n    // Policy class implementing operator/\n    struct Divide {\n      static const bool is_operator  = true; // Operator or function for expression_string()\n      static const int  store_result = 2;    // Do we need any scratch space? (this can be 1 or 2)\n      static const bool is_vectorized = true;\n\n      const char* operation_string() const { return \"/\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const { return left / right; }\n\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation_store(const LType& left, const RType& right, Real& one_over_right) const { \n\tone_over_right = 1.0 / right;\n\treturn left * one_over_right; \n      }\n      \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\t// If f(a,b) = a/b then df/da = 1/b\n\t// If store_result==1 then do this:\n        //left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t//    1.0 / right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch));\n\t// If store_result==2 then do this:\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t\t\t\t\t\t\t\t\t    scratch[MyScratchNum+1]);\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\t// If f(a,b) = a/b then df/db = -a/(b*b) = -f/b\n\t// If store_result==1 then do this:\n        //right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t//      -scratch[MyScratchNum] / right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch));\n\t// If store_result==2 then do this:\n\tright.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t\t\t\t\t\t\t\t      -scratch[MyScratchNum] * scratch[MyScratchNum+1]);\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\t// If f(a,b) = a/b then w*df/da = w/b\n\t// If store_result==1 then do this:\n        //left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier\n\t//    / right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch));\n\t// If store_result==2 then do this:\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t\t\t\t\t\t\t\t\t    multiplier*scratch[MyScratchNum+1]);\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\t// If f(a,b) = a/b then w*df/db = -w*a/(b*b) = -w*f/b\n\t// If store_result==1 then do this:\n        //right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t//\t\t  -multiplier * scratch[MyScratchNum] \n\t//\t      / right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch));\n\t// If store_result==2 then do this:\n\tright.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t\t\t\t\t\t      -multiplier * scratch[MyScratchNum] * scratch[MyScratchNum+1]);\n      }\n    };\n\n    // Policy class implementing function pow\n    struct Pow {\n      static const bool is_operator  = false; // Operator or function for expression_string()\n      static const int  store_result = 1;     // Do we need any scratch space? (this CANNOT be changed)\n      static const bool is_vectorized = false;\n\n      const char* operation_string() const { return \"pow\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const {\n\tusing std::pow;\n\treturn pow(left, right);\n      }\n      \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tusing std::pow;\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t   right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch)\n\t    *pow(left.template value_stored_<MyArrayNum, MyScratchNum+store_result>(loc, scratch),\n\t\t right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch) - 1.0));\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tusing std::log;\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t  scratch[MyScratchNum] * log(left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch)));\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tusing std::pow;\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier\n\t    *right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch)\n\t    *pow(left.template value_stored_<MyArrayNum, MyScratchNum+store_result>(loc, scratch),\n\t\t right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch) - 1.0));\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tusing std::log;\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t\t   multiplier * scratch[MyScratchNum] \n\t\t  * log(left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch)));\n      }\n    };\n\n\n    // Policy class implementing function atan2\n    struct Atan2 {\n      static const bool is_operator  = false; // Operator or function for expression_string()\n      static const int  store_result = 2;     // Do we need any scratch space? Yes: for left^2+right^2\n      static const bool is_vectorized = false;\n\n      const char* operation_string() const { return \"atan2\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const {\n\tusing std::atan2;\n\treturn atan2(left, right);\n      }\n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation_store(const LType& left, const RType& right, Real& saved_term) const {\n\tusing std::atan2;\n\tsaved_term = 1.0 / (left*left + right*right);\n\treturn atan2(left, right);\n      }\n            \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t   right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch)\n\t    *scratch[MyScratchNum+1]);\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t  -left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch)*scratch[MyScratchNum+1]);\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n        left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, \n\t   right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch)\n\t    *scratch[MyScratchNum+1]*multiplier);\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n        right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, \n\t  -left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch)*scratch[MyScratchNum+1]*multiplier);\n      }\n    };\n\n\n    // Policy class implementing function max\n    struct Max {\n      static const bool is_operator  = false; // Operator or function for expression_string()\n      static const int  store_result = 0;    // Do we need any scratch space? (this can be 0 or 1)\n      static const bool is_vectorized = true;\n\n      const char* operation_string() const { return \"max\"; } // For expression_string()\n      \n      // Implement the basic operation - first the version for packets\n      template <class LType, class RType>\n      typename enable_if<is_packet<LType>::value,LType>::type\n      operation(const LType& left, const RType& right) const\n      { return adept::internal::fmax(left,right); }\n\n#ifndef ADEPT_CXX11_FEATURES\n      // For C++98, use simple ternary operation\n      template <class LType, class RType>\n      typename enable_if<!is_packet<LType>::value,typename promote<LType, RType>::type>::type\n      operation(const LType& left, const RType& right) const { return left < right ? right : left; }\n#else\n      // For C++11 use the (hopefully faster) fmax function for floating-point functions\n      template <class LType, class RType>\n      typename enable_if<!is_packet<LType>::value &&\n                         (!is_floating_point<LType>::value\n\t\t\t  || !is_floating_point<RType>::value),\n\t\t\t typename promote<LType, RType>::type>::type\n      operation(const LType& left, const RType& right) const { return left < right ? right : left; }\n\n      template <class LType, class RType>\n      typename enable_if<!is_packet<LType>::value &&\n                         (is_floating_point<LType>::value\n\t\t\t  && is_floating_point<RType>::value),\n\t\t\t typename promote<LType, RType>::type>::type\n      operation(const LType& left, const RType& right) const { return std::fmax(left,right); }\n#endif\n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tif (is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch);\n\t}\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tif (!is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch);\n\t}\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tif (is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier);\n\t}\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tif (!is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, multiplier);\n\t}\n      }\n\n    private:\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      bool is_left(const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t   const ScratchVector<NScratch>& scratch) const {\n\treturn left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch)\n\t  > right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch);\n      }\n    };\n\n\n    // Policy class implementing function min\n    struct Min {\n      static const bool is_operator  = false; // Operator or function for expression_string()\n      static const int  store_result = 0;    // Do we need any scratch space? (this can be 0 or 1)\n      static const bool is_vectorized = true;\n\n      const char* operation_string() const { return \"min\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename enable_if<is_packet<LType>::value,LType>::type\n      operation(const LType& left, const RType& right) const\n      { return adept::internal::fmin(left,right); }\n#ifndef ADEPT_CXX11_FEATURES\n      // For C++98, use simple ternary operation\n      template <class LType, class RType>\n      typename enable_if<!is_packet<LType>::value,typename promote<LType, RType>::type>::type\n      operation(const LType& left, const RType& right) const { return left < right ? left : right; }\n#else\n      // For C++11 use the (hopefully faster) fmin function for floating-point functions\n      template <class LType, class RType>\n      typename enable_if<!is_packet<LType>::value &&\n                         (!is_floating_point<LType>::value\n\t\t\t  || !is_floating_point<RType>::value),\n\t\t\t typename promote<LType, RType>::type>::type\n      operation(const LType& left, const RType& right) const { return left < right ? left : right; }\n\n      template <class LType, class RType>\n      typename enable_if<!is_packet<LType>::value &&\n                         (is_floating_point<LType>::value\n\t\t\t  && is_floating_point<RType>::value),\n\t\t\t typename promote<LType, RType>::type>::type\n      operation(const LType& left, const RType& right) const { return std::fmin(left,right); }\n#endif\n  \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tif (is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch);\n\t}\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tif (!is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch);\n\t}\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tif (is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier);\n\t}\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tif (!is_left<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  right.template calc_gradient_<MyArrayNum+L::n_arrays, MyScratchNum+L::n_scratch+store_result>(stack, loc, scratch, multiplier);\n\t}\n      }\n\n    private:\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      bool is_left(const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t   const ScratchVector<NScratch>& scratch) const {\n\treturn left.template value_stored_<MyArrayNum,MyScratchNum+store_result>(loc, scratch)\n\t  <= right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch);\n      }\n    };\n\n    // Policy class implementing copysign\n    struct CopySign {\n      static const bool is_operator  = false;  // Operator or function for expression_string()\n      static const int  store_result = 0;     // Do we need any scratch space?\n      static const bool is_vectorized = false;\n\n      const char* operation_string() const { return \"copysign\"; } // For expression_string()\n      \n      // Implement the basic operation\n      template <class LType, class RType>\n      typename promote<LType, RType>::type\n      operation(const LType& left, const RType& right) const {\n\t// Not very efficient but no guarantee that copysign function\n\t// is available, and also would need to check for\n\t// compatibility of left and right types.\n\tif (right >= 0) {\n\t  return left;\n\t}\n\telse {\n\t  return -left;\n\t}\n      }\n      \n      // Calculate the gradient of the left-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\tif (is_right_positive<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch);\n\t}\n\telse {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, -1.0);\n\t}\n      }\n\n      // Calculate the gradient of the right-hand argument\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch) const {\n\t// Do nothing: gradient of RHS is zero\n      }\n\n      // Calculate the gradient of the left-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tif (is_right_positive<MyArrayNum,MyScratchNum>(left,right,loc,scratch)) {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, multiplier);\n\t}\n\telse {\n\t  left.template calc_gradient_<MyArrayNum, MyScratchNum+store_result>(stack, loc, scratch, -multiplier);\n\t}\n      }\n\n      // Calculate the gradient of the right-hand argument with a multiplier\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R, typename MyType>\n      void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t       const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\t// Do nothing: gradient of RHS is zero\n      }\n    private:\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class L, class R>\n      bool is_right_positive(const L& left, const R& right, const ExpressionSize<NArrays>& loc,\n\t\t\t     const ScratchVector<NScratch>& scratch) const {\n\treturn right.template value_stored_<MyArrayNum+L::n_arrays,MyScratchNum+L::n_scratch+store_result>(loc, scratch)\n\t  >= 0.0;\n      }\n\n    };\n\n    \n\n  } // End namespace internal\n\n\n#define ADEPT_DEFINE_OPERATION(NAME, OPERATOR)\t\t\t\t\\\n  template<class L, class R>\t\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::rank_compatible<L::rank, R::rank>::value, \\\n\t\t\t       internal::BinaryOperation<typename internal::promote<typename L::type, \\\n\t\t\t\t\t\t\t\t\t\t    typename R::type>::type, \\\n\t\t\t\t\t\t\t L, internal:: NAME, R> >::type\t\\\n  OPERATOR(const Expression<typename L::type, L>& l,\t\t\t\\\n\t   const Expression<typename R::type, R>& r)\t{\t\t\\\n    using namespace adept::internal;\t\t\t\t\t\\\n    return BinaryOperation<typename promote<typename L::type,\t\t\\\n\t\t\t\t\t    typename R::type>::type,\t\\\n\t\t\t   L, NAME, R>(l.cast(), r.cast());\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n  template<typename LType, class R>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::is_not_expression<LType>::value, \\\n\t\t\t       internal::BinaryOpScalarLeft<typename internal::promote<LType, \\\n\t\t\t\t\t\t\t\t\t\t       typename R::type>::type, \\\n\t\t\t\t\t\t\t    LType, internal:: NAME, R> >::type \\\n  OPERATOR(const LType& l, const Expression<typename R::type, R>& r)\t{ \\\n    using namespace adept::internal;\t\t\t\t\t\\\n    return BinaryOpScalarLeft<typename promote<LType, typename R::type>::type, \\\n      LType, NAME, R>(l, r.cast());\t\t\t\t\t\\\n  }\n\n#define ADEPT_DEFINE_SCALAR_RHS_OPERATION(NAME, OPERATOR)\t\t\\\n  template<class L, typename RType>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::is_not_expression<RType>::value, \\\n\t\t\t       internal::BinaryOpScalarRight<typename internal::promote<typename L::type, \\\n\t\t\t\t\t\t\t\t\t\t\tRType>::type, \\\n\t\t\t\t\t\t\t     L, internal:: NAME, RType> >::type \\\n  OPERATOR(const Expression<typename L::type, L>& l, const RType& r) {\t\\\n    using namespace adept::internal;\t\t\t\t\t\\\n    return BinaryOpScalarRight<typename promote<typename L::type, RType>::type, \\\n      L, NAME, RType>(l.cast(), r);\t\t\\\n  }\n\n  // The following define Expr*Expr and Scalar*Expr\n  ADEPT_DEFINE_OPERATION(Add, operator+)\n  ADEPT_DEFINE_OPERATION(Subtract, operator-)\n  ADEPT_DEFINE_OPERATION(Multiply, operator*)\n  ADEPT_DEFINE_OPERATION(Divide, operator/)\n  ADEPT_DEFINE_OPERATION(Pow, pow)\n  ADEPT_DEFINE_OPERATION(Atan2, atan2)\n  ADEPT_DEFINE_OPERATION(Max, max)\n  ADEPT_DEFINE_OPERATION(Min, min)\n  // If std::max has been brought into scope via a \"using\" directive\n  // then calling \"max\" with two arguments of the same type will call\n  // the std::max rather than adept::max function, even if these\n  // arguments are from the adept namespace. This will cause a compile\n  // failure. Likewise with std::min. To avoid this, either don't use\n  // \"using std::max\", or alternatively use Adept's \"fmax\" and \"fmin\"\n  // functions, which do the same thing but match the C++11 functions\n  // std::fmax and std::fmin for floating-point types.  Note that you\n  // can use these Adept functions even if you are not using C++11.\n  ADEPT_DEFINE_OPERATION(Max, fmax)\n  ADEPT_DEFINE_OPERATION(Min, fmin)\n  ADEPT_DEFINE_OPERATION(CopySign, copysign)\n\n  // The following define Expr*Scalar; those in the list above but not\n  // below (e.g. Divide) use a custom implementation of Expr*Scalar\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Add, operator+)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Subtract, operator-)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Multiply, operator*)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Pow, pow)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Max, max)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Min, min)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Max, fmax)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(Min, fmin)\n  ADEPT_DEFINE_SCALAR_RHS_OPERATION(CopySign, copysign)\n\n#undef ADEPT_DEFINE_OPERATION\n#undef ADEPT_DEFINE_SCALAR_RHS_OPERATION\n\n  // Treat expression divided by floating-point scalar differently\n  // since this can be changed to a more efficient multiplication\n  template<class L, typename RType>\n  inline\n  typename internal::enable_if<internal::is_not_expression<RType>::value \n                               && (internal::is_floating_point<RType>::value || L::is_active),\n\t\t\t       internal::BinaryOpScalarRight<typename internal::promote<typename L::type,\n\t\t\t\t\t\t\t\t\t\t\tRType>::type,\n\t\t\t\t\t\t\t     L, internal::Multiply, \n\t\t\t\t\t\t\t     typename internal::promote<typename L::type,\n\t\t\t\t\t\t\t\t\t\t\tRType>::type> >::type\n  operator/(const Expression<typename L::type, L>& l, const RType& r) {\n    using namespace adept::internal;\n    typedef typename promote<typename L::type, RType>::type PType;\n    return BinaryOpScalarRight<PType, L, Multiply, PType>(l.cast(), 1.0/static_cast<PType>(r));\n  }\n\n  // Treat expression divided by any other type of scalar as division,\n  // but differentiation is not properly implemented for dividing by a\n  // scalar, so if the left hand side is active then the version above\n  // (converting to a multiplication) will be used\n  template<class L, typename RType>\n  inline\n  typename internal::enable_if<internal::is_not_expression<RType>::value\n                               && (!internal::is_floating_point<RType>::value && !L::is_active),\n\t\t\t       internal::BinaryOpScalarRight<typename internal::promote<typename L::type,\n\t\t\t\t\t\t\t\t\t\t\tRType>::type,\n\t\t\t\t\t\t\t     L, internal::Divide, \n\t\t\t\t\t\t\t     typename internal::promote<typename L::type,\n\t\t\t\t\t\t\t\t\t\t\tRType>::type> >::type\n  operator/(const Expression<typename L::type, L>& l, const RType& r) {\n    using namespace adept::internal;\n    typedef typename promote<typename L::type, RType>::type PType;\n    return BinaryOpScalarRight<PType, L, Divide, PType>(l.cast(), static_cast<PType>(r));\n  }\n\n// Now the operators returning boolean results\n\n#define ADEPT_DEFINE_OPERATOR(NAME, OPERATOR, OPSYMBOL, OPSTRING)\t\\\n  namespace internal {\t\t\t\t\t\t\t\\\n    struct NAME {\t\t\t\t\t\t\t\\\n      static const bool is_operator  = true;\t\t\t\t\\\n      static const int  store_result = 0;\t                        \\\n      static const bool is_vectorized = false;\t\t\t\t\\\n      const char* operation_string() const { return OPSTRING; }\t\t\\\n      \t\t\t\t\t\t\t\t\t\\\n      template <class LType, class RType>\t\t\t\t\\\n      bool operation(const LType& left, const RType& right) const\t\\\n      { return left OPSYMBOL right; }\t\t\t\t\t\\\n    };\t\t\t\t\t\t\t\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n  template<class L, class R>\t\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::rank_compatible<L::rank, R::rank>::value \\\n\t\t\t       && (L::rank > 0 || R::rank > 0) ,\t\\\n\t    internal::BinaryOperation<bool,L,internal:: NAME, R> >::type \\\n  OPERATOR(const Expression<typename L::type, L>& l,\t\t\t\\\n\t   const Expression<typename R::type, R>& r)\t{\t\t\\\n    using namespace adept::internal;\t\t\t\t\t\\\n    return BinaryOperation<bool, L, NAME, R>(l.cast(), r.cast());\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n  \t\t\t\t\t\t\t\t\t\\\n  template<typename LType, class R>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::is_not_expression<LType>::value \\\n\t\t\t       && (R::rank > 0) ,\t\t\t\\\n\t\t\t       internal::BinaryOpScalarLeft<bool,LType,internal:: NAME, R> >::type \\\n  OPERATOR(const LType& l, const Expression<typename R::type, R>& r) {\t\\\n    using namespace adept::internal;\t\t\t\t\t\\\n    return BinaryOpScalarLeft<bool, LType, NAME, R>(l, r.cast());\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n  \t\t\t\t\t\t\t\t\t\\\n  template<class L, typename RType>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::is_not_expression<RType>::value \\\n\t\t       && (L::rank > 0),\t\t\t\\\n       internal::BinaryOpScalarRight<bool, L, internal:: NAME, RType> >::type \\\n  OPERATOR(const Expression<typename L::type, L>& l, const RType& r) {\t\\\n    using namespace adept::internal;\t\t\t\t\t\\\n    return BinaryOpScalarRight<bool, L, NAME, RType>(l.cast(), r);\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\t\t\\\n  template<class L, class R>\t\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<L::rank == 0 && R::rank == 0,\t\t\\\n\t\t\t       bool>::type\t\t\t\t\\\n  OPERATOR(const Expression<typename L::type, L>& l,\t\t\t\\\n\t   const Expression<typename R::type, R>& r) {\t\t\t\\\n    return l.scalar_value() OPSYMBOL r.scalar_value();\t\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n  \t\t\t\t\t\t\t\t\t\\\n  template<typename LType, class R>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::is_not_expression<LType>::value \\\n\t\t\t       && R::rank == 0, bool>::type\t\t\\\n  OPERATOR(const LType& l, const Expression<typename R::type, R>& r) {\t\\\n    return l OPSYMBOL r.scalar_value();\t\t\t\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n  \t\t\t\t\t\t\t\t\t\\\n  template<class L, typename RType>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  typename internal::enable_if<internal::is_not_expression<RType>::value \\\n\t\t\t       && L::rank == 0, bool>::type\t\t\\\n  OPERATOR(const Expression<typename L::type, L>& l, const RType& r) {\t\\\n    return l.scalar_value() OPSYMBOL r;\t\t\t\t\t\\\n  }\n\n\n// These return bool expressions when applied to expressions of rank\n// greater than zero\nADEPT_DEFINE_OPERATOR(GreaterThan, operator>, >, \" > \")\nADEPT_DEFINE_OPERATOR(LessThan, operator<, <, \" < \")\nADEPT_DEFINE_OPERATOR(GreaterThanEqualTo, operator>=, >=, \" >= \")\nADEPT_DEFINE_OPERATOR(LessThanEqualTo, operator<=, <=, \" <= \")\nADEPT_DEFINE_OPERATOR(EqualTo, operator==, ==, \" == \")\nADEPT_DEFINE_OPERATOR(NotEqualTo, operator!=, !=, \" != \")\n\n// These should only work on bool expressions\nADEPT_DEFINE_OPERATOR(Or, operator||, ||, \" || \")\nADEPT_DEFINE_OPERATOR(And, operator&&, &&, \" && \")\n\n#undef ADEPT_DEFINE_OPERATOR\n\n  template <typename Type, class R>\n  inline\n  typename internal::enable_if<R::rank == 0,Type>::type\n  value(const Expression<Type, R>& r) {\n    return r.scalar_value();\n  }\n\n} // End namespace adept\n\n\n#endif\n"
  },
  {
    "path": "include/adept/Expression.h",
    "content": "/* Expression.h -- Base class for arrays and active objects\n\n    Copyright (C) 2014-2017 European Centre for Medium-Range Weather Forecasts\n\n    Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptExpression_H\n#define AdeptExpression_H\n\n#include <sstream>\n#include <cmath>\n\n#include <adept/ExpressionSize.h>\n#include <adept/traits.h>\n#include <adept/exception.h>\n#include <adept/ScratchVector.h>\n#include <adept/Packet.h>\n\nnamespace adept {\n\n  using internal::Packet;\n\n  // ---------------------------------------------------------------------\n  // SECTION 0: Forward declarations \n  // ---------------------------------------------------------------------\n  \n  class Stack;\n\n  // ---------------------------------------------------------------------\n  // SECTION 1: Definition of Expression type\n  // ---------------------------------------------------------------------\n\n  // All types of expression derive from Expression.  \"A\" is the\n  // actual type of the expression (a use of the Curiously Recurring\n  // Template Pattern).\n  template <typename Type, class A>\n  struct Expression {\n\n    // Static information about the expression\n  public:\n    typedef Type type;\n    typedef Type value_type; // STL-style\n\n    // There are several \"static const\" members in the derived\n    // classes, some of which require fall-back values, defined here:\n\n    // By default an expression is not vectorizable.\n    static const bool is_vectorizable = false;\n\n    // Classes derived from this one that do not define how many\n    // scratch variables, active variables or arrays they contain are\n    // assumed to need zero\n    static const int  n_scratch = 0;\n\n    // Number of active variables in the expression (where each array\n    // counts as 1), used to work out how much space must be reserved\n    // on the operation stack\n    static const int  n_active = 0;\n\n    // Is this an active expression?\n    static const bool is_active = false;\n\n    // Expressions cannot be lvalues by default\n    static const bool is_lvalue = false;\n\n    // The presence of _adept_expression_flag is used to define the\n    // adept::is_not_expression trait\n    typedef bool _adept_expression_flag;\n\n    // Cast the expression to its true type, given by the template\n    // argument\n    const A& cast() const { return static_cast<const A&>(*this); }\n    \n    // Return the dimensions of the expression\n    template <int Rank>\n    bool get_dimensions(ExpressionSize<Rank>& dim) const {\n      return cast().get_dimensions_(dim);\n    }\n\n    // Return a string representation of the expression\n    std::string expression_string() const {\n      return cast().expression_string_();\n    }\n    \n    Type value_with_len(Index j, Index len) const {\n      ADEPT_STATIC_ASSERT(A::rank<=1,\n\t\t  VALUE_WITH_LEN_ONLY_APPLICABLE_TO_ARRAYS_OF_RANK_0_OR_1);\n      return cast().value_with_len_(j, len);\n    }\n\n    // These functions are for rank-0 expressions where there is no\n    // indexing required\n    Type scalar_value() const { \n      ExpressionSize<0> dummy_index;\n      return cast().template value_at_location_<0>(dummy_index);\n    }\n\n    // Return true if any memory in the expression lies between mem1\n    // and mem2: used to test for aliasing when doing assignment.\n    bool is_aliased(const Type* mem1, const Type* mem2) const {\n      return cast().is_aliased_(mem1, mem2);\n    }\n\n    // Return true if the fastest varying dimension of all the arrays\n    // in the expression are contiguous and increasing.  If so, we can\n    // more simply increment their indices.\n    bool all_arrays_contiguous() const {\n      return cast().all_arrays_contiguous_();\n    }\n\n    // By default, arrays are contiguous (this fall-back used for\n    // objects that aren't arrays)\n    bool all_arrays_contiguous_() const { return true; }\n\n    // Are all the arrays in the expression aligned to a Packet<Type>\n    // boundary?\n    bool is_aligned() const {\n      return cast().is_aligned();\n    }\n\n    // In order to perform optimal vectorization, the first memory\n    // addresses of each inner dimension must be aligned\n    // appropriately, or they should all have the same offset so that\n    // this number of scalar operations can be performed at the start\n    // before begining on vector instructions.  This function returns\n    // the offset of the data in any arrays in the expression, or -1 if\n    // there is a clash in offsets.\n    int alignment_offset() const {\n      int val = cast().template alignment_offset_<Packet<Type>::size>();\n      if (val < Packet<Type>::size) {\n\treturn val;\n      }\n      else {\n\t// Note that if an object returns val==Packet<Type>::size then\n\t// it indicates that alignment does not matter for this object\n\treturn 0;\n      }\n    }\n    \n    // Fall-back position is that alignment doesn't matter for this\n    // object, which is encoded by returning n\n    template <int n>\n    int alignment_offset_() const { return n; }\n\n    // If the sub-expression is of a different type from that\n    // requested then we assume there must be no aliasing.\n    template <typename MyType>\n    typename internal::enable_if<!internal::is_same<MyType,Type>::value, bool>::type\n    is_aliased(const MyType* mem1, const MyType* mem2) const {\n      return false;\n    }\n  \n    Type \n    scalar_value_and_gradient(Stack& stack) const {\n      internal::ScratchVector<A::n_scratch> scratch;\n      ExpressionSize<0> dummy_index;\n      Type val = cast().template value_at_location_store_<0,0>(dummy_index, scratch);\n      cast().template calc_gradient_<0,0>(stack, dummy_index, scratch);\n      return val;\n    }\n \n    // For each array in the expression use location \"i\" to return the\n    // memory index\n    template <int Rank, int NArrays>\n    void\n    set_location(const ExpressionSize<Rank>& i, \n\t\t ExpressionSize<NArrays>& index) const {\n      cast().template set_location_<0>(i, index);\n    }\n\n    // Get the value at the specified location and move to the next\n    // location\n    template <int NArrays>\n    Type next_value(ExpressionSize<NArrays>& index) const {\n      Type val = cast().template value_at_location_<0>(index);\n      cast().template advance_location_<0>(index);\n      return val;\n    }\n    // If all arrays are have an inner dimension that is contiguous\n    // and increasing then their indices may be incremented all\n    // together, which is more efficient\n    template <int NArrays>\n    Type next_value_contiguous(ExpressionSize<NArrays>& index) const {\n      Type val = cast().template value_at_location_<0>(index);\n      ++index;\n      return val;\n    }\n\n    template <int NArrays>\n    Packet<Type> next_packet(ExpressionSize<NArrays>& index) const {\n      Packet<Type> val\n      \t= cast().template packet_at_location_<0>(index);\n      index += Packet<Type>::size;\n      return val;\n    }\n\n    template <int NArrays>\n    Type value_at_location(ExpressionSize<NArrays>& index) const {\n      return cast().template value_at_location_<0>(index);\n    }\n    template <int NArrays>\n    void advance_location(ExpressionSize<NArrays>& index) const {\n      cast().template advance_location_<0>(index);\n    }\n\n    // Get the value at the specified location, calculate the gradient\n    // and move to the next location\n    template <int NArrays>\n    Type next_value_and_gradient(Stack& stack,\n\t\t\t\t ExpressionSize<NArrays>& index) const {\n      internal::ScratchVector<A::n_scratch> scratch;\n      Type val = cast().template value_at_location_store_<0,0>(index, scratch);\n      cast().template calc_gradient_<0,0>(stack, index, scratch);\n      cast().template advance_location_<0>(index);\n      //++index;\n      return val;\n    }\n    template <int NArrays>\n    Type next_value_and_gradient_contiguous(Stack& stack,\n\t\t\t\t ExpressionSize<NArrays>& index) const {\n      internal::ScratchVector<A::n_scratch> scratch;\n      Type val = cast().template value_at_location_store_<0,0>(index, scratch);\n      cast().template calc_gradient_<0,0>(stack, index, scratch);\n      //cast().template advance_location_<0>(index);\n      ++index;\n      return val;\n    }\n\n    // This is used in product()\n    template <int NArrays, typename MyType>\n    Type next_value_and_gradient_special(Stack& stack,\n\t\t\t\t ExpressionSize<NArrays>& index,\n\t\t\t\t const MyType& multiplier) const {\n      internal::ScratchVector<A::n_scratch> scratch;\n      Type val = cast().template value_at_location_store_<0,0>(index, scratch);\n      cast().template calc_gradient_<0,0>(stack, index, scratch, multiplier);\n      cast().template advance_location_<0>(index);\n      return val;\n    }\n\n    // This is used in norm2()\n    template <int NArrays, typename MyType>\n    Type next_value_and_gradient_special2(Stack& stack,\n\t\t\t\t ExpressionSize<NArrays>& index,\n\t\t\t\t const MyType& multiplier) const {\n      internal::ScratchVector<A::n_scratch> scratch;\n      Type val = cast().template value_at_location_store_<0,0>(index, scratch);\n      cast().template calc_gradient_<0,0>(stack, index, scratch, multiplier*val);\n      cast().template advance_location_<0>(index);\n      return val;\n    }\n\n    // Inaccessible methods\n    //  private:\n    //    Expression(const Expression&) { }\n\n  }; // End struct Expression\n\n\n  // ---------------------------------------------------------------------\n  // SECTION 2: Definition of Scalar type\n  // ---------------------------------------------------------------------\n\n  // Specific types of operation are in the adept::internal namespace\n  namespace internal {\n\n    // SCALAR\n\n    template <typename Type>\n    struct Scalar : public Expression<Type, Scalar<Type> > {\n      static const int  rank       = 0;\n      static const int  n_scratch  = 0;\n      static const int  n_active   = 0;\n      static const int  n_arrays   = 0;\n      static const bool is_active  = false;\n      static const bool is_vectorizable = true;\n\n      Scalar(const Type& value) : val_(value) { }\n\n      bool get_dimensions_(ExpressionSize<0>& dim) const { return true; }\n\n      std::string expression_string_() const {\n\tstd::stringstream s;\n\ts << val_;\n\treturn s.str();\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const { return false; }\n\n      Type value_with_len_(const Index& j, const Index& len) const\n      { return val_; }\n\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const { } \n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const\n      { return val_; }\n\n      template <int MyArrayNum, int NArrays>\n      Packet<Type>\n      packet_at_location_(const ExpressionSize<NArrays>& loc) const\n      { return Packet<Type>(val_); }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const\n      { return val_; }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const\n      { return val_; }\n\n      template <bool IsAligned,\tint MyArrayNum, typename PacketType,\n\tint NArrays>\n      PacketType values_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn PacketType(val_);\n      }\n\n      template <bool UseStored, bool IsAligned,\tint MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      PacketType values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn PacketType(val_);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {}\n\n      template <int MyArrayNum, int MyScratchNum, \n\t\tint NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  const MyType& multiplier) const {}\n\n      template <bool IsAligned, int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t\tint NArrays, int NScratch, int NActive>\n      void calc_gradient_packet_(Stack& stack, \n\t\t\t\t const ExpressionSize<NArrays>& loc,\n\t\t\t\t const ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t\t ScratchVector<NActive,Packet<Real> >& gradients) const {}\n\n      template <bool IsAligned, int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t\tint NArrays, int NScratch, int NActive, typename MyType>\n      void calc_gradient_packet_(Stack& stack, \n\t\t\t\t const ExpressionSize<NArrays>& loc,\n\t\t\t\t const ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t\t ScratchVector<NActive,Packet<Real> >& gradients,\n\t\t\t\t const MyType& multiplier) const {}\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {}\n\n    protected:\n      Type val_;\n      \n    };\n\n\n\n    // ---------------------------------------------------------------------\n    // SECTION 3. \"expr_cast\" helper \n    // ---------------------------------------------------------------------\n\n    // The following enables one of the static consts only in a\n    // derived class of Expression to be extracted, and is useful when\n    // you don't know whether a template argument to a function is an\n    // Expression or a class derived from it.  Thus\n    // expr_cast<Expression<double,Array> >::is_vectorizable and\n    // expr_cast<Array>::is_vectorizable would both return\n    // Array::is_vectorizable.\n\n    template <class E>\n    struct expr_cast {\n      // Rank of the array\n      static const int  rank = E::rank;\n      // Number of scratch floating-point variables needed in the\n      // expression, for example to store the result of a calculation\n      // when it is needed again to compult the equivalent differential\n      // statement\n      static const int  n_scratch = E::n_scratch;\n      // Number of arrays within the expression; more specifically,\n      // the number of indices required to store the location of an\n      // element of the array\n      static const int  n_arrays = E::n_arrays;\n      // Number of active terms in the expression\n      static const int  n_active = E::n_active;\n      // Is this an array expression?\n      static const bool is_array = (E::rank > 0);\n      // Is this an array expression with dimension of 2 or more?\n      static const bool is_multidimensional = (E::rank > 1);\n      // Is this an active expression?\n      static const bool is_active = E::is_active;\n      // Is this expression actually an lvalue such as Array or\n      // FixedArray?\n      static const bool is_lvalue = E::is_lvalue;\n      // Is this expression vectorizable (conditional on a few extra\n      // run-time checks)?\n      static const bool is_vectorizable = E::is_vectorizable;  \n    };\n\n    template <typename T, class E>\n    struct expr_cast<Expression<T,E> > {\n      static const int  rank = E::rank;\n      static const int  n_scratch = E::n_scratch;\n      static const int  n_arrays = E::n_arrays;\n      static const int  n_active = E::n_active;\n      static const bool is_array = (E::rank > 0);\n      static const bool is_multidimensional = (E::rank > 1);\n      static const bool is_active = E::is_active;\n      static const bool is_lvalue = E::is_lvalue;\n      static const bool is_vectorizable = E::is_vectorizable;\n    };\n\n  }\n}\n\n#endif // AdeptExpression_H\n"
  },
  {
    "path": "include/adept/ExpressionSize.h",
    "content": "/* ExpressionSize.h -- Class for describing array sizes\n\n    Copyright (C) 2014-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The ExpressionSize class is used to pass information between\n   components of an expression on the dimensions (e.g. rows/columns,\n   but works in any number of dimensions) of that part of an\n   expression, and to check that the dimensions match.  Since\n   ExpressionSize objects can be used to index arrays, they may be\n   useful to users and so are not placed in the \"adept::internal\"\n   namespace.\n\n*/\n\n#ifndef AdeptExpressionSize_H\n#define AdeptExpressionSize_H\n\n#include <string>\n#include <sstream>\n\n#include <adept/base.h>\n#include <adept/traits.h>\n\nnamespace adept {\n\n  // Definition of ExpressionSize class\n  template <int Rank>\n  class ExpressionSize {\n  public:\n    // Constructors\n    ExpressionSize() { } // By default no initialization is done\n\n    ExpressionSize(Index j) {\n      if (j >= 0) {\n\t// Set all dimensions to the same value - usually 0 (empty\n\t// array) or 1 (scalar)\n\tset_all(j);\n      }\n      else {\n\t// Set just the first dimension to j; usually this would be\n\t// less than 0 to indicate an invalid expression\n\tdim[0] = j;\n      }\n    }\n\n    ExpressionSize(Index j0, Index j1)\n    { dim[0]=j0; dim[1]=j1; }\n    ExpressionSize(Index j0, Index j1, Index j2)\n    { dim[0]=j0; dim[1]=j1; dim[2]=j2; }\n    ExpressionSize(Index j0, Index j1, Index j2, Index j3)\n    { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; }\n    ExpressionSize(Index j0, Index j1, Index j2, Index j3, Index j4)\n    { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; dim[4]=j4; }\n    ExpressionSize(Index j0, Index j1, Index j2, Index j3, Index j4, Index j5)\n    { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; dim[4]=j4; dim[5]=j5; }\n    ExpressionSize(Index j0, Index j1, Index j2, Index j3, Index j4, Index j5, Index j6)\n    { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; dim[4]=j4; dim[5]=j5; dim[6]=j6; }\n\n    // Assume copy constructor will copy elements of dim\n    \n    // An \"invalid\" expression is one involving a mismatch of array\n    // sizes, and is conveyed by a negative first element\n    bool invalid_expression() const { return (dim[0] < 0); }\n\n    // Set all to specified value\n    void set_all(Index j) {\n      for (int i = 0; i < Rank; ++i) {\n\tdim[i] = j;\n      }\n    }\n\n    // Copy from an ExpressionSize object of the same rank\n    void copy(const ExpressionSize& d) {\n      for (int i = 0; i < Rank; ++i) {\n\tdim[i] = d[i];\n      }\n    }\n    // ...or pointer to raw data\n    void copy(const Index* d) {\n      for (int i = 0; i < Rank; ++i) {\n\tdim[i] = d[i];\n      }\n    }\n\n    // Copy dissimilar ExpressionSize object, filling the remaining\n    // dimensions with 1\n    template <int MyRank>\n    void copy_dissimilar(const ExpressionSize<MyRank>& d) {\n      int rank = MyRank > Rank ? Rank : MyRank;\n      for (int i = 0; i < rank; ++i) {\n\tdim[i] = d[i];\n      }\n      for (int i = rank; i < Rank; ++i) {\n\tdim[i] = 1;\n      }\n    }\n\n    // String representation\n    std::string str() const {\n      std::stringstream s;\n      s << \"[\" << dim[0];\n      for (int i = 1; i < Rank; ++i) {\n\ts << \",\" << dim[i];\n      }\n      s << \"]\";\n      return s.str();\n    }\n\n    // Get the total number of elements\n    Index size() const {\n      Index prod;\n      if (Rank == 0) {\n\tprod = 1;\n      }\n      else {\n\tprod = dim[0];\n\tfor (int i = 1; i < Rank; ++i) {\n\t  prod *= dim[i];\n\t}\n      }\n      return prod;\n    }\n\n    ExpressionSize& operator++() {\n      for (int i = 0; i < Rank; ++i) {\n\t++dim[i];\n      }\n      return *this;\n    }\n    ExpressionSize& operator+=(Index inc) {\n      for (int i = 0; i < Rank; ++i) {\n\tdim[i] += inc;\n      }\n      return *this;\n    }\n\n\n    bool operator==(const ExpressionSize<Rank>& rhs) const {\n      for (int i = 0; i < Rank; i++) {\n\tif (dim[i] != rhs[i]) {\n\t  return false;\n\t}\n      }\n      return true;\n    }\n    bool operator!=(const ExpressionSize<Rank>& rhs) const {\n      return !(*this == rhs);\n    }\n\n#ifdef ADEPT_MOVE_SEMANTICS\n    friend void swap(ExpressionSize<Rank>& l, \n\t\t     ExpressionSize<Rank>& r) noexcept {\n      for (int i = 0; i < Rank; ++i) {\n\tIndex tmp = l.dim[i];\n\tl.dim[i] = r.dim[i];\n\tr.dim[i] = tmp;\n      }\n    }\n#endif\n\n    // Const and non-const access to elements\n    Index& operator[](int i) { return dim[i]; }\n    const Index& operator[](int i) const { return dim[i]; }\n  private:\n    Index dim[Rank];\n  };\n\n  // Specialization for scalars (zero-rank arrays) known at compile\n  // time\n  template <>\n  class ExpressionSize<0> {\n  public:\n    ExpressionSize() { }\n    ExpressionSize(Index j) { }\n    bool invalid_expression() const { return false; }\n    std::string str() const { return \"\"; }\n    void set_all(Index) const { }\n    bool operator==(const ExpressionSize<0>&) const { return true; }\n    bool operator!=(const ExpressionSize<0>&) const { return false; }\n    bool operator[](int) const { return 0; }\n    template <int MyRank>\n    void copy_dissimilar(const ExpressionSize<MyRank>&) { }\n  };\n\n  // Send the size of an expression to a stream\n  template <int Rank>\n  inline\n  std::ostream& operator<<(std::ostream& os, const ExpressionSize<Rank>& s) {\n    if (Rank > 0) {\n      os << \"(\" << s[0];\n      for (int i = 1; i < Rank; i++) {\n\tos << \",\" << s[i];\n      }\n      return os << \")\";\n    }\n  }\n \n\n  namespace internal {\n    // The following are only used within the Adept library\n\n    // Check whether the size of one expression is compatible with\n    // that of another for arithmetic operations: this is \"true\" if\n    // the rank is the same and the dimensions match, or if one of the\n    // expressions is a scalar (zero rank).  If the ranks don't match\n    // and neither is zero then the program won't compile.\n    template <int LRank, int RRank>\n    inline\n    typename enable_if<LRank==RRank && (LRank>1), bool>::type\n    compatible(const ExpressionSize<LRank>& l, const ExpressionSize<RRank>& r) {\n      bool result = (l[0] == r[0]);\n      for (int i = 1; i < RRank; ++i) {\n\tresult = result && (l[i] == r[i]);\n      }\n      return result;\n    }\n\n    template <int LRank, int RRank>\n    inline\n    typename enable_if<LRank==1 && RRank==1, bool>::type\n    compatible(const ExpressionSize<LRank>& l, const ExpressionSize<RRank>& r) {\n      return l[0] == r[0];\n    }\n\n    template <int LRank, int RRank>\n    inline\n    typename enable_if<LRank==0 || RRank==0, bool>::type\n    compatible(const ExpressionSize<LRank>& l, const ExpressionSize<RRank>& r) {\n      return true;\n    }\n\n    // Return an ExpressionSize object of specified rank that expresses\n    // an invalid expression\n    template <int Rank>\n    inline\n    ExpressionSize<Rank> invalid_expression_size() {\n      return ExpressionSize<Rank>(-1);\n    }\n\n  } // End namespace internal\n\n  // Deprecated\n  inline ExpressionSize<1> expression_size(Index j0)\n  { return ExpressionSize<1>(j0); }\n  inline ExpressionSize<2> expression_size(Index j0, Index j1)\n  { return ExpressionSize<2>(j0, j1); }\n\n  // Use this instead\n  inline ExpressionSize<1> dimensions(Index j0)\n  { return ExpressionSize<1>(j0); }\n  inline ExpressionSize<2> dimensions(Index j0, Index j1)\n  { return ExpressionSize<2>(j0, j1); }\n  inline ExpressionSize<3> dimensions(Index j0, Index j1, Index j2)\n  { return ExpressionSize<3>(j0, j1, j2); }\n  inline ExpressionSize<4> dimensions(Index j0, Index j1, Index j2,\n\t\t\t\t      Index j3)\n  { return ExpressionSize<4>(j0, j1, j2, j3); }\n  inline ExpressionSize<5> dimensions(Index j0, Index j1, Index j2,\n\t\t\t\t      Index j3, Index j4)\n  { return ExpressionSize<5>(j0, j1, j2, j3, j4); }\n  inline ExpressionSize<6> dimensions(Index j0, Index j1, Index j2,\n\t\t\t\t      Index j3, Index j4, Index j5)\n  { return ExpressionSize<6>(j0, j1, j2, j3, j4, j5); }\n  inline ExpressionSize<7> dimensions(Index j0, Index j1, Index j2,\n\t\t\t\t      Index j3, Index j4, Index j5, Index j6)\n  { return ExpressionSize<7>(j0, j1, j2, j3, j4, j5, j6); }\n\n\n} // End namespace adept\n\n#endif // AdeptExpressionSize_H\n"
  },
  {
    "path": "include/adept/FixedArray.h",
    "content": "/* FixedArray.h -- active or inactive FixedArray of arbitrary rank\n\n    Copyright (C) 2014-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The FixedArray class has functionality modelled on Fortran-90 arrays -\n   they can have a rank up to 7 (above will work, but some forms of\n   indexing these arrays will not work).\n\n*/\n\n#ifndef AdeptFixedArray_H\n#define AdeptFixedArray_H 1\n\n#include <iostream>\n#include <sstream>\n#include <limits>\n#include <complex>\n\n#include <adept/Array.h>\n#include <adept/Allocator.h>\n\nnamespace adept {\n\n  namespace internal {\n\n    // -------------------------------------------------------------------\n    // Helper classes\n    // -------------------------------------------------------------------\n\n    // The following are used by expression_string()\n    template <int Rank, bool IsActive>\n    struct fixed_array_helper            { const char* name() { return \"FixedArray\";  } };\n    template <int Rank>\n    struct fixed_array_helper<Rank,true> { const char* name() { return \"aFixedArray\";  } };\n\n    template <>\n    struct fixed_array_helper<1,false>   { const char* name() { return \"FixedVector\"; } };\n    template <>\n    struct fixed_array_helper<1,true>    { const char* name() { return \"aFixedVector\"; } };\n\n    template <>\n    struct fixed_array_helper<2,false>   { const char* name() { return \"FixedMatrix\"; } };\n    template <>\n    struct fixed_array_helper<2,true>    { const char* name() { return \"aFixedMatrix\"; } };\n\n    template<Index J0, Index J1, Index J2, Index J3,\n\t     Index J4, Index J5, Index J6>\n    struct fixed_array {\n      static const int rank = (J0>0)\n\t* (1 + (J1>0) * (1 + (J2>0) * (1 + (J3>0) * (1 + (J4>0) * (1 + (J5>0) * (1 + (J6>0)))))));\n      static const Index length = (J0 + (J0<1)) * (J1 + (J1<1)) * (J2 + (J2<1))\n\t* (J3 + (J3<1)) * (J4 + (J4<1)) * (J5 + (J5<1)) * (J6 + (J6<1));\n    };\n\n  } // End namespace internal\n\n\n  // -------------------------------------------------------------------\n  // Definition of FixedArray class\n  // -------------------------------------------------------------------\n  template<typename Type, bool IsActive, Index J0, Index J1 = 0, \n\t   Index J2 = 0, Index J3 = 0, Index J4 = 0, Index J5 = 0, Index J6 = 0>\n  class FixedArray\n    : public Expression<Type,FixedArray<Type,IsActive,J0,J1,J2,J3,J4,J5,J6> >,\n      protected internal::GradientIndex<IsActive> {\n\n  public:\n    // -------------------------------------------------------------------\n    // FixedArray: 1. Static Definitions\n    // -------------------------------------------------------------------\n\n    // The Expression base class needs access to some protected member\n    // functions in section 5\n    friend struct Expression<Type,FixedArray<Type,IsActive,J0,J1,J2,J3,J4,J5,J6> >;\n\n    // Static definitions to enable the properties of this type of\n    // expression to be discerned at compile time\n    static const bool is_active  = IsActive;\n    static const bool is_lvalue  = true;\n    static const int  rank       = internal::fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank;\n    static const int  length_    = internal::fixed_array<J0,J1,J2,J3,J4,J5,J6>::length;\n    static const int  n_active   = IsActive * (1 + internal::is_complex<Type>::value);\n    static const int  n_scratch  = 0;\n    static const int  n_arrays   = 1;\n    static const bool is_vectorizable = Packet<Type>::is_vectorized;\n\n  protected:\n    template <int Dim, Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias { };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<0,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X0; };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<1,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X1; };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<2,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X2; };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<3,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X3; };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<4,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X4; };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<5,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X5; };\n    template <Index X0, Index X1, Index X2,\n\t      Index X3, Index X4, Index X5, Index X6>\n    struct dimension_alias<6,X0,X1,X2,X3,X4,X5,X6>\n    { static const Index value = X6; };\n\n  public:\n    template <int Dim> struct dimension_ { static const int value \n      = dimension_alias<Dim,J0,J1,J2,J3,J4,J5,J6>::value; };\n\n    template <int RankMinusDim, int Dim>\n    struct offset_helper { \n      static const Index value = // Dim == Rank-1 ? 1 :\n\tdimension_<Dim+1>::value*offset_helper<RankMinusDim-1, Dim+1>::value; \n    };\n    template <int Dim>\n    struct offset_helper<1,Dim> { static const Index value = 1; };\n    template <int Dim>\n    struct offset_helper<0,Dim> { static const Index value = 1; };\n    template <int Dim>\n    struct offset_helper<-1,Dim> { static const Index value = 1; };\n    template <int Dim>\n    struct offset_helper<-2,Dim> { static const Index value = 1; };\n    template <int Dim>\n    struct offset_helper<-3,Dim> { static const Index value = 1; };\n    template <int Dim>\n    struct offset_helper<-4,Dim> { static const Index value = 1; };\n    template <int Dim>\n    struct offset_helper<-5,Dim> { static const Index value = 1; };\n\n    template <int Dim> struct offset_ { static const Index value\n      = offset_helper<rank-Dim, Dim>::value; };\n\n\n    // -------------------------------------------------------------------\n    // FixedArray: 2. Constructors\n    // -------------------------------------------------------------------\n    \n    // Initialize an empty array\n    FixedArray() : internal::GradientIndex<IsActive>(length_, false) {\n      ADEPT_STATIC_ASSERT(!(std::numeric_limits<Type>::is_integer\n\t\t\t    && IsActive), CANNOT_CREATE_ACTIVE_FIXED_ARRAY_OF_INTEGERS);\n#ifdef ADEPT_REAL_INIT\n      initialize<Type>();\n#endif \n    }\n\n#ifdef ADEPT_REAL_INIT\n  private:\n\n    // Initialize to zero, NaN or whatever for debugging\n    template <typename T>\n    typename internal::enable_if<internal::is_floating_point<T>::value, void>::type\n    initialize() {\n      for (int i = 0; i < length_; ++i) {\n\tdata_[i] = ADEPT_INIT_REAL;\n      }\n    }\n    template <typename T>\n    typename internal::enable_if<internal::is_complex<T>::value, void>::type\n    initialize() {\n      for (int i = 0; i < length_; ++i) {\n#ifdef ADEPT_INIT_REAL_SNAN\n        data_[i] = std::complex<typename Type::value_type>(\n          std::numeric_limits<typename Type::value_type>::signaling_NaN(),\n\t  std::numeric_limits<typename Type::value_type>::signaling_NaN());\n#else\n\tdata_[i] = std::complex<typename Type::value_type>(ADEPT_INIT_REAL, ADEPT_INIT_REAL);\n#endif\n      }\n    }\n\n    // Dummy initialize for non-floats\n    template <typename T>\n    typename internal::enable_if<!internal::is_floating_point<T>::value\n\t\t\t\t && !internal::is_complex<T>::value, void>::type\n    initialize() { }\n\n  public:\n#endif\n\n    // Copy constructor copies the data, unlike in the Array class\n    FixedArray(const FixedArray& rhs) \n      : internal::GradientIndex<IsActive>(length_, false)\n    { *this = rhs; }\n\n  public:\n    // Initialize with an expression on the right hand side by\n    // evaluating the expression, requiring the ranks to be equal.\n    // Note that this constructor enables expressions to be used as\n    // arguments to functions that expect an array - to prevent this\n    // implicit conversion, use the \"explicit\" keyword.\n    template<typename EType, class E>\n    FixedArray(const Expression<EType, E>& rhs,\n\t  typename internal::enable_if<E::rank == rank,int>::type = 0)\n      : internal::GradientIndex<IsActive>(length_, false)\n    { *this = rhs; }\n\n#ifdef ADEPT_CXX11_FEATURES\n    // Initialize from initializer list\n    template <typename T>\n    FixedArray(std::initializer_list<T> list) \n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n\n    // The unfortunate restrictions on initializer_list constructors\n    // mean that each possible Array rank needs explicit treatment\n    template <typename T>\n    FixedArray(std::initializer_list<\n\t  std::initializer_list<T> > list)\n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n\n    template <typename T>\n    FixedArray(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > list)\n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n\n    template <typename T>\n    FixedArray(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > list)\n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n\n    template <typename T>\n    FixedArray(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > > list)\n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n\n    template <typename T>\n    FixedArray(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > > > list)\n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n\n    template <typename T>\n    FixedArray(std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<\n\t  std::initializer_list<T> > > > > > > list)\n      : internal::GradientIndex<IsActive>(length_,false) { *this = list; }\n    \n#endif\n\n    // Destructor: if the data are stored in a Storage object then we\n    // tell it that one fewer object is linking to it; if the number\n    // of links to it drops to zero, it will destruct itself and\n    // deallocate the memory.\n    ~FixedArray()\n    { internal::GradientIndex<IsActive>::unregister(length_); }\n\n    // -------------------------------------------------------------------\n    // FixedArray: 3. Assignment operators\n    // -------------------------------------------------------------------\n\n    // Assignment to another matrix: copy the data...\n    // Ideally we would like this to fall back to the operator=(const\n    // Expression&) function, but if we don't define a copy assignment\n    // operator then C++ will generate a default one :-(\n    FixedArray& operator=(const FixedArray& rhs) {\n      *this = static_cast<const Expression<Type,FixedArray>&> (rhs);\n      return *this;\n    }\n\n    // Assignment to an array expression of the same rank\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == rank, FixedArray&>::type\n    inline\n    operator=(const Expression<EType,E>& rhs) {\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      ExpressionSize<rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"FixedArray size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (!internal::compatible(dims, dimensions())) {\n\tstd::string str = \"Expr\";\n\tstr += dims.str() + \" object assigned to \" + expression_string_();\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n#endif\n      // Select active/passive version by delegating to a protected\n      // function\n      assign_expression_<rank, IsActive, E::is_active>(rhs);\n\n      return *this;\n    }\n\n    // Assignment to a single value copies to every element\n    template <typename RType>\n    typename internal::enable_if<internal::is_not_expression<RType>::value, FixedArray&>::type\n    operator=(RType rhs) {\n      assign_inactive_scalar_<rank,IsActive>(rhs);\n      return *this;\n    }\n\n    // Assign active scalar expression to an active array by first\n    // converting the RHS to an active scalar\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == 0 && (rank > 0) && IsActive && !E::is_lvalue,\n      FixedArray&>::type\n    operator=(const Expression<EType,E>& rhs) {\n      Active<EType> x = rhs;\n      *this = x;\n      return *this;\n    }\n\n    // Assign an active scalar to an active array\n    template <typename PType>\n    FixedArray& \n    operator=(const Active<PType>& rhs) {\n      ADEPT_STATIC_ASSERT(IsActive, ATTEMPT_TO_ASSIGN_ACTIVE_SCALAR_TO_INACTIVE_FIXED_ARRAY);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_inactive_scalar_<rank,IsActive>(rhs.scalar_value());\n\treturn *this;\n      }\n#endif\n      // In case PType != Type we make a local copy to minimize type\n      // conversions\n      Type val = rhs.scalar_value();\n\t\n      ADEPT_ACTIVE_STACK->check_space(length_);\n      for (Index i = 0; i < length_; ++i) {\n\tdata_[i] = val;\n\tADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index());\n\tADEPT_ACTIVE_STACK->push_lhs(gradient_index()+i);\n      }\n\n      return *this;\n    }\n    \n#define ADEPT_DEFINE_OPERATOR(OPERATOR, OPSYMBOL)\t\t\\\n    template <class RType>\t\t\t\t\t\\\n    FixedArray& OPERATOR(const RType& rhs) {\t\t\t\\\n      return *this = noalias(*this OPSYMBOL rhs);\t\t\\\n    }\n    ADEPT_DEFINE_OPERATOR(operator+=, +)\n    ADEPT_DEFINE_OPERATOR(operator-=, -)\n    ADEPT_DEFINE_OPERATOR(operator*=, *)\n    ADEPT_DEFINE_OPERATOR(operator/=, /)\n  //    ADEPT_DEFINE_OPERATOR(operator&=, &);\n  //    ADEPT_DEFINE_OPERATOR(operator|=, |);\n#undef ADEPT_DEFINE_OPERATOR\n\n    // Enable the A.where(B) = C construct.\n  \n    // Firstly implement the A.where(B) to return a \"Where<A,B>\" object\n    template <class B>\n    typename internal::enable_if<B::rank == rank, internal::Where<FixedArray,B> >::type\n    where(const Expression<bool,B>& bool_expr) {\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      ExpressionSize<rank> dims;\n      if (!bool_expr.get_dimensions(dims)) {\n\tstd::string str = \"FixedArray size mismatch in \"\n\t  + bool_expr.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims != dimensions()) {\n\tthrow size_mismatch(\"Boolean expression of different size\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n#endif\n      return internal::Where<FixedArray,B>(*this, bool_expr.cast());\n    }\n    \n    // When Where<A,B> = C is invoked, it calls\n    // A.assign_conditional(B,C). This is implemented separately for\n    // the case when C is an inactive scalar and when it is an array\n    // expression.\n    template <class B, typename C>\n    typename internal::enable_if<internal::is_not_expression<C>::value, void>::type\n    assign_conditional(const Expression<bool,B>& bool_expr,\n\t\t\t    C rhs) {\n      if (!empty()) {\n\tassign_conditional_inactive_scalar_<IsActive>(bool_expr, rhs);\n      }\n    }\n    \n    template <class B, typename T, class C>\n    void assign_conditional(const Expression<bool,B>& bool_expr,\n\t\t\t    const Expression<T,C>& rhs) {\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      // Assume size of bool_expr already checked\n      ExpressionSize<rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"FixedArray size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (!internal::compatible(dims,dimensions())) {\n\tthrow size_mismatch(\"Right-hand-side of \\\"where\\\" construct of incompatible size\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n#endif\n      // Select active/passive version by delegating to a\n      // protected function\n      assign_conditional_<IsActive>(bool_expr.cast(), rhs.cast());\n      //      return *this;\n    }\n\n#ifdef ADEPT_CXX11_FEATURES\n    // Assignment of a FixedArray to an initializer list; the first ought\n    // to only work for vectors\n    template <typename T>\n    typename internal::enable_if<std::is_convertible<T,Type>::value, FixedArray&>::type\n    operator=(std::initializer_list<T> list) {\n      ADEPT_STATIC_ASSERT(rank==1, RANK_MISMATCH_IN_INITIALIZER_LIST);\n\n      if (list.size() > J0) {\n\tthrow size_mismatch(\"Initializer list is larger than Vector in assignment\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      // Zero the whole array first in order that automatic\n      // differentiation works\n      *this = 0;\n      Index index = 0;\n      for (auto i = std::begin(list); i < std::end(list); ++i,\n\t   ++index) {\n\tdata_[index*offset_<0>::value] = *i;\t\n      }\n      return *this;\n    }\n\n    // Assignment of a higher rank Array to a list of lists...\n    template <class IType>\n    FixedArray& operator=(std::initializer_list<std::initializer_list<IType> > list) {\n      ADEPT_STATIC_ASSERT(rank==internal::initializer_list_rank<IType>::value+2,\n      \t\t\t  RANK_MISMATCH_IN_INITIALIZER_LIST);\n      if (list.size() > J0) {\n\tthrow size_mismatch(\"Multi-dimensional initializer list larger than slowest-varying dimension of Array\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      // Zero the whole array first in order that automatic\n      // differentiation works\n      *this = 0;\n\n      // Enact the assignment using the Array version\n      inactive_link() = list;\n      return *this;\n    }\n#endif\n  \n    // -------------------------------------------------------------------\n    // FixedArray: 4. Access functions, particularly operator()\n    // -------------------------------------------------------------------\n  \n    // Get l-value of the element at the specified coordinates\n    typename internal::active_reference<Type,IsActive>::type\n    get_lvalue(const ExpressionSize<rank>& i) {\n      return get_lvalue_<IsActive>(index_(i));\n    }\n    \n    typename internal::active_scalar<Type,IsActive>::type\n    get_rvalue(const ExpressionSize<rank>& i) const {\n      return get_rvalue_<IsActive>(index_(i));\n    }\n\n  protected:\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive, ActiveReference<Type> >::type\n    get_lvalue_(const Index& loc) {\n      return ActiveReference<Type>(data_[loc], gradient_index()+loc);\n    }\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive, Type&>::type\n    get_lvalue_(const Index& loc) {\n      return data_[loc];\n    }\n\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive, Active<Type> >::type\n    get_rvalue_(const Index& loc) const {\n      return Active<Type>(data_[loc], gradient_index()+loc);\n    }\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive, const Type&>::type\n    get_rvalue_(const Index& loc) const {\n      return data_[loc];\n    }\n\n  public:\n    // Get a constant reference to the element at the specified\n    // location, ignoring whether it is active or not\n    //    const Type& get(const ExpressionSize<rank>& i) const {\n    //      return data_[index_(i)];\n    //    }\n\n    // The following provide a way to access individual elements of\n    // the array.  There must be the same number of arguments to\n    // operator() as the rank of the array.  Each argument must be of\n    // integer type, or a rank-0 expression of integer type (such as\n    // \"end\" or \"end-3\"). Inactive arrays return a reference to the\n    // element, while active arrays return an ActiveReference<Type>\n    // object.  Up to 7 dimensions are supported.\n\n    // l-value access to inactive array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, Type&>::type\n    operator()(I0 i0) \n    { return data_[internal::get_index_with_len(i0,J0)]; }\n    \n    // r-value access to inactive array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, const Type&>::type\n    operator()(I0 i0) const\n    { return data_[internal::get_index_with_len(i0,J0)]; }\n\n    // l-value access to inactive array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, Type&>::type\n    operator[](I0 i0) \n    { return data_[internal::get_index_with_len(i0,J0)]; }\n\n    // r-value access to inactive array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && !IsActive, const Type&>::type\n    operator[](I0 i0) const\n    { return data_[internal::get_index_with_len(i0,J0)]; }\n \n  protected:\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive,Type&>::type\n    get_scalar_reference(const Index& offset)\n    { return data_[offset]; }\n\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive,const Type&>::type\n    get_scalar_reference(const Index& offset) const\n    { return data_[offset]; }\n\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive,ActiveReference<Type> >::type\n    get_scalar_reference(const Index& offset) \n    { return ActiveReference<Type>(data_[offset], gradient_index()+offset); }\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive,ActiveConstReference<Type> >::type\n    get_scalar_reference(const Index& offset) const\n    { return ActiveConstReference<Type>(data_[offset], gradient_index()+offset); }\n\n  public:\n\n    // l-value access to active array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveReference<Type> >::type\n    operator()(I0 i0) {\n      Index offset = internal::get_index_with_len(i0,J0);\n      return ActiveReference<Type>(data_[offset], gradient_index()+offset);\n    }\n    \n    // r-value access to active array with function-call operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveConstReference<Type> >::type\n    operator()(I0 i0) const {\n      Index offset = internal::get_index_with_len(i0,J0);\n      return ActiveConstReference<Type>(data_[offset], gradient_index()+offset);\n    }\n  \n    // l-value access to active array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveReference<Type> >::type\n    operator[](I0 i0) {\n      Index offset = internal::get_index_with_len(i0,J0);\n      return ActiveReference<Type>(data_[offset], gradient_index()+offset);\n    }\n    \n    // r-value access to active array with element-access operator\n    template <typename I0>\n    typename internal::enable_if<rank==1 && internal::all_scalar_ints<1,I0>::value && IsActive,\n\t\t       ActiveConstReference<Type> >::type\n    operator[](I0 i0) const {\n      Index offset = internal::get_index_with_len(i0,J0);\n      return ActiveConstReference<Type>(data_[offset], gradient_index()+offset);\n    }\n      \n    // 2D array l-value and r-value access\n    template <typename I0, typename I1>\n    typename internal::enable_if<rank==2 && internal::all_scalar_ints<2,I0,I1>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1) {\n      return get_scalar_reference<IsActive>(\n\t\t    internal::get_index_with_len(i0,J0)*J1\n\t\t  + internal::get_index_with_len(i1,J1));\n    }\n    template <typename I0, typename I1>\n    typename internal::enable_if<rank==2 && internal::all_scalar_ints<2,I0,I1>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1) const {\n      return get_scalar_reference<IsActive>(\n\t\t    internal::get_index_with_len(i0,J0)*J1\n\t\t  + internal::get_index_with_len(i1,J1));\n    }\n  \n    // 3D array l-value and r-value access\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<rank==3 && internal::all_scalar_ints<3,I0,I1,I2>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2) {\n      return get_scalar_reference<IsActive>(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t+ internal::get_index_with_len(i1,J1))\n\t\t\t\t\t    + internal::get_index_with_len(i2,J2));\n    }\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<rank==3 && internal::all_scalar_ints<3,I0,I1,I2>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2) const {\n      return get_scalar_reference<IsActive>(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t+ internal::get_index_with_len(i1,J1))\n\t\t\t\t\t    + internal::get_index_with_len(i2,J2));\n    }\n\n    // 4D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<rank==4 && internal::all_scalar_ints<4,I0,I1,I2,I3>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) {\n      return get_scalar_reference<IsActive>(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t    + internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t+ internal::get_index_with_len(i2,J2))\n\t\t\t\t\t    + internal::get_index_with_len(i3,J3));\n    }\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<rank==4 && internal::all_scalar_ints<4,I0,I1,I2,I3>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) const {\n      return get_scalar_reference<IsActive>(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t    + internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t+ internal::get_index_with_len(i2,J2))\n\t\t\t\t\t    + internal::get_index_with_len(i3,J3));\n    }\n\n    // 5D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<rank==5 && internal::all_scalar_ints<5,I0,I1,I2,I3,I4>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) {\n      return get_scalar_reference<IsActive>(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t\t+ internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t    + internal::get_index_with_len(i2,J2))\n\t\t\t\t\t\t+ internal::get_index_with_len(i3,J3))\n\t\t\t\t\t    + internal::get_index_with_len(i4,J4));\n    }\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<rank==5 && internal::all_scalar_ints<5,I0,I1,I2,I3,I4>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const {\n      return get_scalar_reference<IsActive>(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t\t+ internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t    + internal::get_index_with_len(i2,J2))\n\t\t\t\t\t\t+ internal::get_index_with_len(i3,J3))\n\t\t\t\t\t    + internal::get_index_with_len(i4,J4));\n    }\n\n    // 6D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<rank==6 && internal::all_scalar_ints<6,I0,I1,I2,I3,I4,I5>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) {\n      return get_scalar_reference<IsActive>(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t\t    + internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t\t+ internal::get_index_with_len(i2,J2))\n\t\t\t\t\t\t    + internal::get_index_with_len(i3,J3))\n\t\t\t\t\t\t+ internal::get_index_with_len(i4,J4))\n\t\t\t\t\t    + internal::get_index_with_len(i5,J5));\n    }\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<rank==6 && internal::all_scalar_ints<6,I0,I1,I2,I3,I4,I5>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const {\n      return get_scalar_reference<IsActive>(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t\t    + internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t\t+ internal::get_index_with_len(i2,J2))\n\t\t\t\t\t\t    + internal::get_index_with_len(i3,J3))\n\t\t\t\t\t\t+ internal::get_index_with_len(i4,J4))\n\t\t\t\t\t    + internal::get_index_with_len(i5,J5));\n    }\n\n    // 7D array l-value and r-value access\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<rank==7 && internal::all_scalar_ints<7,I0,I1,I2,I3,I4,I5,I6>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) {\n      return get_scalar_reference<IsActive>(J6*(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t\t\t+ internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t\t    + internal::get_index_with_len(i2,J2))\n\t\t\t\t\t\t\t+ internal::get_index_with_len(i3,J3))\n\t\t\t\t\t\t    + internal::get_index_with_len(i4,J4))\n\t\t\t\t\t\t+ internal::get_index_with_len(i5,J5))\n\t\t\t\t\t    + internal::get_index_with_len(i6,J6));\n    }\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<rank==7 && internal::all_scalar_ints<7,I0,I1,I2,I3,I4,I5,I6>::value,\n\t\t       typename internal::active_const_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const {\n      return get_scalar_reference<IsActive>(J6*(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0)\n\t\t\t\t\t\t\t\t+ internal::get_index_with_len(i1,J1))\n\t\t\t\t\t\t\t    + internal::get_index_with_len(i2,J2))\n\t\t\t\t\t\t\t+ internal::get_index_with_len(i3,J3))\n\t\t\t\t\t\t    + internal::get_index_with_len(i4,J4))\n\t\t\t\t\t\t+ internal::get_index_with_len(i5,J5))\n\t\t\t\t\t    + internal::get_index_with_len(i6,J6));\n    }\n   \n\n    // The following define the case when operator() is called and one\n    // of the arguments is a \"range\" object (an object that describes\n    // a range of indices that are either contiguous or separated by a\n    // fixed stride), while all others are of integer type (or a\n    // rank-0 expression of integer type). An Array object is returned\n    // with a rank that may be reduced from that of the original\n    // array, by one for each dimension that was indexed by an\n    // integer. The new array points to a subset of the original data,\n    // so modifying it will modify the original array.\n\n    // First the case of a vector where we know the argument must be a\n    // \"range\" object\n    template <typename I0>\n    typename internal::enable_if<internal::is_ranged<rank,I0>::value,\n\t\t       Array<1,Type,IsActive> >::type\n    operator()(I0 i0) {\n      ExpressionSize<1> new_dim((i0.end(J0) + i0.stride(J0) - i0.begin(J0))\n\t\t\t\t/i0.stride(J0));\n      ExpressionSize<1> new_offset(i0.stride(J0));\n      return Array<1,Type,IsActive>(data_, i0.begin(J0), new_dim, new_offset,\n\t\t\t\t    internal::GradientIndex<IsActive>::get());\n    }\n    template <typename I0>\n    typename internal::enable_if<internal::is_ranged<rank,I0>::value,\n\t\t       const Array<1,Type,IsActive> >::type\n    operator()(I0 i0) const {\n      ExpressionSize<1> new_dim((i0.end(J0) + i0.stride(J0) - i0.begin(J0))\n\t\t\t\t/i0.stride(J0));\n      ExpressionSize<1> new_offset(i0.stride(J0));\n      return Array<1,Type,IsActive>(data_, i0.begin(J0), new_dim, new_offset,\n\t\t\t\t    internal::GradientIndex<IsActive>::get());\n    }\n\n  private:\n    // For multi-dimensional arrays, we need a helper function\n\n    // Treat the indexing of dimension \"irank\" in the case that the\n    // index is of integer type\n    template <int Rank, typename T, int NewRank>\n    typename internal::enable_if<internal::is_scalar_int<T>::value, void>::type\n    update_index(const T& i, Index& inew_rank, Index& ibegin,\n\t\t ExpressionSize<NewRank>& new_dim, \n\t\t ExpressionSize<NewRank>& new_offset) const {\n      ibegin += internal::get_index_with_len(i,dimension_<Rank>::value)*offset_<Rank>::value;\n    }\n\n    // Treat the indexing of dimension \"irank\" in the case that the\n    // index is a \"range\" object\n    template <int Rank, typename T, int NewRank>\n    typename internal::enable_if<internal::is_range<T>::value, void>::type\n    update_index(const T& i, Index& inew_rank, Index& ibegin,\n\t\t ExpressionSize<NewRank>& new_dim, \n\t\t ExpressionSize<NewRank>& new_offset) const {\n      ibegin += i.begin(dimension_<Rank>::value)*offset_<Rank>::value;\n      new_dim[inew_rank]\n      = (i.end(dimension_<Rank>::value)\n\t + i.stride(dimension_<Rank>::value)-i.begin(dimension_<Rank>::value))\n      / i.stride(dimension_<Rank>::value);\n      new_offset[inew_rank] = i.stride(dimension_<Rank>::value)*offset_<Rank>::value;\n      ++inew_rank;\n    }\n  \n  public:\n\n    // Now the individual overloads for each number of arguments, up\n    // to 7, with separate r-value (const) and l-value (non-const)\n    // versions\n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1>::value,\n\t\t       Array<internal::is_ranged<rank,I0,I1>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1) {\n      static const int new_rank = internal::is_ranged<rank,I0,I1>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1>::value,\n\t\t       const Array<internal::is_ranged<rank,I0,I1>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1) const {\n      static const int new_rank = internal::is_ranged<rank,I0,I1>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2>::value,\n\t       Array<internal::is_ranged<rank,I0,I1,I2>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2) {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2>::value,\n\t       const Array<internal::is_ranged<rank,I0,I1,I2>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2) const {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3>::value,\n       Array<internal::is_ranged<rank,I0,I1,I2,I3>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3>::value,\n       const Array<internal::is_ranged<rank,I0,I1,I2,I3>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3) const {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3,I4>::value,\n       Array<internal::is_ranged<rank,I0,I1,I2,I3,I4>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3,I4>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n  \n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3,I4>::value,\n       const Array<internal::is_ranged<rank,I0,I1,I2,I3,I4>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3,I4>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n  \n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5>::value,\n       Array<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5>::count,Type,IsActive> >::type\n     operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3,I4,I5>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5>::value,\n       const Array<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5>::count,Type,IsActive> >::type\n     operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3,I4,I5>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5,I6>::value,\n       Array<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5,I6>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3,I4,I5,I6>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset);\n      update_index<6>(i6, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n\n    template <typename I0, typename I1, typename I2, typename I3,\n\t      typename I4, typename I5, typename I6>\n    typename internal::enable_if<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5,I6>::value,\n       const Array<internal::is_ranged<rank,I0,I1,I2,I3,I4,I5,I6>::count,Type,IsActive> >::type\n    operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const {\n      static const int new_rank = internal::is_ranged<rank,I0,I1,I2,I3,I4,I5,I6>::count;\n      ExpressionSize<new_rank> new_dim;\n      ExpressionSize<new_rank> new_offset;\n      Index inew_rank = 0;\n      Index ibegin = 0;\n      update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset);\n      update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset);\n      update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset);\n      update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset);\n      update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset);\n      update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset);\n      update_index<6>(i6, inew_rank, ibegin, new_dim, new_offset);\n      return Array<new_rank,Type,IsActive>(data_, ibegin, new_dim, new_offset,\n\t\t\t\t\t   internal::GradientIndex<IsActive>::get());\n    }\n  \n    // If one or more of the indices is not guaranteed to be monotonic\n    // at compile time then we must return an IndexedArray, now done\n    // for all possible numbers of arguments\n\n    // Indexing a 1D array\n    template <typename I0>\n    typename internal::enable_if<rank == 1 && internal::is_int_vector<I0>::value\n\t\t       && !internal::is_ranged<rank,I0>::value,\n\t\t       internal::IndexedArray<rank,Type,IsActive,FixedArray,I0> >::type\n    operator()(const I0& i0) {\n      return internal::IndexedArray<rank,Type,IsActive,FixedArray,I0>(*this, i0);\n    }\n    template <typename I0>\n    typename internal::enable_if<rank == 1 && internal::is_int_vector<I0>::value\n\t\t       && !internal::is_ranged<rank,I0>::value,\n\t\t       const internal::IndexedArray<rank,Type,IsActive,\n\t\t\t\t\t  FixedArray,I0> >::type\n    operator()(const I0& i0) const {\n      return internal::IndexedArray<rank,Type,IsActive,\n\t\t\t  FixedArray,I0>(*const_cast<FixedArray*>(this), i0);\n    }\n  \n    // Indexing a 2D array\n    template <typename I0, typename I1>\n    typename internal::enable_if<rank == 2 && internal::is_irreg_indexed<rank,I0,I1>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1>::count,\n\t\t\t\t    Type,IsActive,FixedArray,I0,I1> >::type\n    operator()(const I0& i0, const I1& i1) {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1>(*this, i0, i1);\n    }\n    template <typename I0, typename I1>\n    typename internal::enable_if<rank == 2 && internal::is_irreg_indexed<rank,I0,I1>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1>::count,\n\t\t\t\t    Type,IsActive,FixedArray,I0,I1> >::type\n    operator()(const I0& i0, const I1& i1) const {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,\n\t\t\t  FixedArray,I0,I1>(*const_cast<FixedArray*>(this), i0, i1);\n    }\n\n    // Indexing a 3D array\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<rank == 3 && internal::is_irreg_indexed<rank,I0,I1,I2>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2>::count,\n\t\t\t\t    Type,IsActive,FixedArray,I0,I1,I2> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2) {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,\n\t\t\t  I0,I1,I2>(*this, i0, i1, i2);\n    }\n    template <typename I0, typename I1, typename I2>\n    typename internal::enable_if<rank == 3 && internal::is_irreg_indexed<rank,I0,I1,I2>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<rank,\n\t\t\t\t\t\t\t   I0,I1,I2>::count,\n\t\t\t\t    Type,IsActive,FixedArray,I0,I1,I2> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2) const {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,\n\t\t\t  I0,I1,I2>(*const_cast<FixedArray*>(this), i0, i1, i2);\n    }\n\n    // Indexing a 4D array\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<rank == 4 && internal::is_irreg_indexed<rank,I0,I1,I2,I3>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,I3>::count,\n\t\t\t\t    Type,IsActive,FixedArray,I0,I1,I2,I3> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,\n\t\t\t  I0,I1,I2,I3>(*this, i0, i1, i2, i3);\n    }\n    template <typename I0, typename I1, typename I2, typename I3>\n    typename internal::enable_if<rank == 4 && internal::is_irreg_indexed<rank,I0,I1,I2,I3>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,\n\t\t\t\t\t\t\t   I2,I3>::count,\n\t\t\t\t    Type,IsActive,FixedArray,I0,I1,I2,I3> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,\n\t\t\t  I3>(*const_cast<FixedArray*>(this), i0, i1, i2, i3);\n    }\n\n    // Indexing a 5D array\n    template <typename I0, typename I1, typename I2, typename I3, typename I4>\n    typename internal::enable_if<rank == 5\n\t\t       && internal::is_irreg_indexed<rank,I0,I1,I2,I3,I4>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,\n\t\t\t\t\t\t     I3,I4>::count,\n\t\t\t    Type,IsActive,FixedArray,I0,I1,I2,I3,I4> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4) {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,I3,\n\t\t\t  I4>(*this, i0, i1, i2, i3, i4);\n    }\n    template <typename I0, typename I1, typename I2, typename I3, typename I4>\n    typename internal::enable_if<rank == 5\n\t\t       && internal::is_irreg_indexed<rank,I0,I1,I2,I3,I4>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,\n\t\t\t\t\t\t\t   I3,I4>::count,\n\t\t\t\t  Type,IsActive,FixedArray,I0,I1,I2,I3,I4> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4) const {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,I3,\n\t\t\t  I4>(*const_cast<FixedArray*>(this), i0, i1, i2, i3, i4);\n    }\n\n    // Indexing a 6D array\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5>\n    typename internal::enable_if<rank == 6\n\t\t       && internal::is_irreg_indexed<rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t\t   I4,I5>::count,\n\t\t\t  Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4, const I5& i5) {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,I3,I4,\n\t\t\t  I5>(*this,i0,i1,i2,i3,i4,i5);\n    }\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5>\n    typename internal::enable_if<rank == 6\n\t\t       && internal::is_irreg_indexed<rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t\t   I4,I5>::count,\n\t\t\t  Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, \n\t       const I3& i3, const I4& i4, const I5& i5) const {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,I3,I4,\n\t\t\t  I5>(*const_cast<FixedArray*>(this),i0,i1,i2,i3,i4,i5);\n    }\n\n    // Indexing a 7D array\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5, typename I6>\n    typename internal::enable_if<rank == 7\n\t\t       && internal::is_irreg_indexed<rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t     I4,I5,I6>::count,\n\t\t\t  Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5,I6> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3,\n\t       const I4& i4, const I5& i5, const I6& i6) {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5,I6>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5,\n\t\t\t  I6>(*this,i0,i1,i2,i3,i4,i5,i6);\n    }\n    template <typename I0, typename I1, typename I2,\n\t      typename I3, typename I4, typename I5, typename I6>\n    typename internal::enable_if<rank == 7\n\t\t       && internal::is_irreg_indexed<rank,I0,I1,I2,I3,I4,I5>::value,\n\t\t       const internal::IndexedArray<internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t\t   I4,I5,I6>::count,\n\t\t\t  Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5,I6> >::type\n    operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3,\n\t       const I4& i4, const I5& i5, const I6& i6) const {\n      static const int new_rank = internal::is_irreg_indexed<rank,I0,I1,I2,I3,\n\t\t\t\t\t\t   I4,I5,I6>::count;\n      return internal::IndexedArray<new_rank,Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5,\n\t\t\t  I6>(*const_cast<FixedArray*>(this),i0,i1,i2,i3,i4,i5,i6);\n    }\n\n\n    // Provide a C-array-like array access: for a multidimensional\n    // array, operator[](i), where i is of integer type, returns an\n    // array of rank one less than the original array, where the new\n    // array is \"sliced\" at index i of dimension 0.  For a vector,\n    // operator[](i) returns an l-value to the element at i.  Thus for\n    // a 3D array A, A[1][2][3] returns a single element. Note that\n    // this will be slower than A(1,2,3) because each operator[]\n    // creates a new array (although does not copy the data).\n    template <typename T>\n    typename internal::enable_if<internal::is_scalar_int<T>::value && (rank > 1),\n      Array<rank-1,Type,IsActive> >::type\n    operator[](T i) {\n      int index = internal::get_index_with_len(i,J0)*offset_<0>::value;\n      ExpressionSize<rank-1> new_dim;\n      ExpressionSize<rank-1> new_offset;\n      ExpressionSize<rank> dims = dimensions();\n      ExpressionSize<rank> offs = offset();\n      for (int j = 1; j < rank; ++j) {\n\tnew_dim[j-1] = dims[j];\n\tnew_offset[j-1] = offs[j];\n      }\n      return Array<rank-1,Type,IsActive>(data_, index, new_dim, new_offset,\n\t\t\t\t\t  internal::GradientIndex<IsActive>::get());\n    }\n    \n    // diag_matrix(), where *this is a 1D array, returns a DiagMatrix\n    // containing the data as the diagonal pointing to the original\n    // data, Can be used as an lvalue.  Defined in SpecialMatrix.h\n    SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>, IsActive>\n    diag_matrix();\n    \n    Array<1,Type,IsActive>\n    diag_vector(Index offdiag = 0) {\n      ADEPT_STATIC_ASSERT(rank == 2, DIAG_VECTOR_ONLY_WORKS_ON_SQUARE_MATRICES);\n      if (empty()) {\n\t// Return an empty vector\n\treturn Array<1,Type,IsActive>();\n      }\n      else if (J0 != J1) {\n\tthrow invalid_operation(\"diag_vector member function only applicable to square matrices\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      else if (offdiag >= 0) {\n\tIndex new_dim = std::min(J0, J1-offdiag);\n\treturn Array<1,Type,IsActive>(data_, offset_<1>::value*offdiag,  \n\t\t\t\t      ExpressionSize<1>(new_dim),\n\t\t\t\t      ExpressionSize<1>(offset_<0>::value+offset_<1>::value),\n\t\t\t\t      internal::GradientIndex<IsActive>::get());\n      }\n      else {\n\tIndex new_dim = std::min(J0+offdiag, J1);\n\treturn Array<1,Type,IsActive>(data_,-offset_<0>::value*offdiag,  \n\t\t\t\t      ExpressionSize<1>(new_dim),\n\t\t\t\t      ExpressionSize<1>(offset_<0>::value+offset_<1>::value),\n\t\t\t\t      internal::GradientIndex<IsActive>::get());\n      }\n    }\n  \n\n    Array<2,Type,IsActive>\n    submatrix_on_diagonal(Index ibegin, Index iend) {\n      ADEPT_STATIC_ASSERT(rank == 2,\n\t\tSUBMATRIX_ON_DIAGONAL_ONLY_WORKS_ON_SQUARE_MATRICES);\n      if (J0 != J1) {\n\tthrow invalid_operation(\"submatrix_on_diagonal member function only applicable to square matrices\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      else if (ibegin < 0 || ibegin > iend || iend >= J0) {\n\tthrow index_out_of_bounds(\"Dimensions out of range in submatrix_on_diagonal\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\tIndex len = iend-ibegin+1;\n\tExpressionSize<2> dim(len,len);\n\treturn Array<2,Type,IsActive>(data_, ibegin*(offset_<0>::value + offset_<1>::value),\n\t\t\t\t      dim, offset(), internal::GradientIndex<IsActive>::get());\n      }\n    }\n\n    // For extracting contiguous sections out of an array use the\n    // following. Currently this just indexes each dimension with the\n    // contiguous range(a,b) index, but in future it may be optimized.\n\n    // 1D array subset\n    template <typename B0, typename E0>\n    Array<1,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0) {\n      ADEPT_STATIC_ASSERT(rank == 1,\n\t\t\t  SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY);\n      return (*this)(range(ibegin0,iend0));\n    }\n    template <typename B0, typename E0>\n    const Array<1,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0) const {\n      ADEPT_STATIC_ASSERT(rank == 1,\n\t\t\t  SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY);\n      return (*this)(range(ibegin0,iend0));\n    }\n\n    // 2D array subset\n    template <typename B0, typename E0, typename B1, typename E1>\n    Array<2,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1) {\n      ADEPT_STATIC_ASSERT(rank == 2,\n\t\t\t  SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1));\n    }\n    template <typename B0, typename E0, typename B1, typename E1>\n    const Array<2,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t  const B1& ibegin1, const E1& iend1) const {\n      ADEPT_STATIC_ASSERT(rank == 2,\n\t\t\t  SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1));\n    }\n\n    // 3D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2>\n    Array<3,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2) {\n      ADEPT_STATIC_ASSERT(rank == 3,\n\t\t\t  SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2));\n    }     \n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2>\n    const Array<3,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2) const {\n      ADEPT_STATIC_ASSERT(rank == 3,\n\t\t\t  SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2));\n    }\n\n    // 4D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3>\n    Array<4,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3) {\n      ADEPT_STATIC_ASSERT(rank == 4,\n\t\t\t  SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3>\n    const Array<4,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3) const {\n      ADEPT_STATIC_ASSERT(rank == 4,\n\t\t\t  SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3));\n    } \n\n    // 5D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4>\n    Array<5,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4) {\n      ADEPT_STATIC_ASSERT(rank == 5,\n\t\t\t  SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4>\n    const Array<5,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4) const {\n      ADEPT_STATIC_ASSERT(rank == 5,\n\t\t\t  SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4));\n    }\n\n    // 6D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5>\n    Array<6,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5) {\n      ADEPT_STATIC_ASSERT(rank == 6,\n\t\t\t  SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5>\n    const Array<6,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5) const {\n      ADEPT_STATIC_ASSERT(rank == 6,\n\t\t\t  SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5));\n    }\n\n    // 7D array subset\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5,\n\t      typename B6, typename E6>\n    Array<7,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5,\n\t   const B6& ibegin6, const E6& iend6) {\n      ADEPT_STATIC_ASSERT(rank == 7,\n\t\t\t  SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5),\n\t\t     range(ibegin6,iend6));\n    }\n    template <typename B0, typename E0, typename B1, typename E1,\n\t      typename B2, typename E2, typename B3, typename E3,\n\t      typename B4, typename E4, typename B5, typename E5,\n\t      typename B6, typename E6>\n    const Array<7,Type,IsActive>\n    subset(const B0& ibegin0, const E0& iend0, \n\t   const B1& ibegin1, const E1& iend1,\n\t   const B2& ibegin2, const E2& iend2,\n\t   const B3& ibegin3, const E3& iend3,\n\t   const B4& ibegin4, const E4& iend4,\n\t   const B5& ibegin5, const E5& iend5,\n\t   const B6& ibegin6, const E6& iend6) const {\n      ADEPT_STATIC_ASSERT(rank == 7,\n\t\t\t  SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY);\n      return (*this)(range(ibegin0,iend0),range(ibegin1,iend1),\n\t\t     range(ibegin2,iend2),range(ibegin3,iend3),\n\t\t     range(ibegin4,iend4),range(ibegin5,iend5),\n\t\t     range(ibegin6,iend6));\n    }\n\n    // -------------------------------------------------------------------\n    // FixedArray: 5. Public member functions\n    // -------------------------------------------------------------------\n  \n    // STL-like size() returns total length of array\n    Index size() const { return length_; }\n\n    bool get_dimensions_(ExpressionSize<rank>& dims) const {\n      dims[0] = J0;\n      if (J1 > 0) {\n\tdims[1] = J1;\n\tif (J2 > 0) {\n\t  dims[2] = J2;\n\t  if (J3 > 0) {\n\t    dims[3] = J3;\n\t    if (J4 > 0) {\n\t      dims[4] = J4;\n\t      if (J5 > 0) {\n\t\tdims[5] = J5;\n\t\tif (J6 > 0) {\n\t\t  dims[6] = J6;\n\t\t}\n\t      }\n\t    }\n\t  }\n\t}\n      }\n      return true;\n    }\n\n    // Return constant reference to dimensions\n    ExpressionSize<rank> dimensions() const {\n      ExpressionSize<rank> dims;\n      get_dimensions_(dims);\n      return dims;\n    }\n\n    // Return individual dimension\n    Index size(int j) const {\n      if (j >= rank)  { return  0; }\n      else if (j == 0) { return J0; }\n      else if (j == 1) { return J1; }\n      else if (j == 2) { return J2; }\n      else if (j == 3) { return J3; }\n      else if (j == 4) { return J4; }\n      else if (j == 5) { return J5; }\n      else { return J6; }\n    }\n    Index dimension(int j) const {\n      return size(j);\n    }\n\n    // Return individual offset\n    Index offset(int j) const {\n      if (j >= rank)  { return  0; }\n      else if (j == 0) { return offset_<0>::value; }\n      else if (j == 1) { return offset_<1>::value; }\n      else if (j == 2) { return offset_<2>::value; }\n      else if (j == 3) { return offset_<3>::value; }\n      else if (j == 4) { return offset_<4>::value; }\n      else if (j == 5) { return offset_<5>::value; }\n      else if (j == 6) { return offset_<6>::value; }\n      else { throw invalid_dimension(); }\n    }\n\n    // Return constant reference to offsets\n    ExpressionSize<rank> offset() const {\n      ExpressionSize<rank> offs;\n      offs[0] = offset_<0>::value;\n      if (J1 > 0) {\n\toffs[1] = offset_<1>::value;\n\tif (J2 > 0) {\n\t  offs[2] = offset_<2>::value;\n\t  if (J3 > 0) {\n\t    offs[3] = offset_<3>::value;\n\t    if (J4 > 0) {\n\t      offs[4] = offset_<4>::value;\n\t      if (J5 > 0) {\n\t\toffs[5] = offset_<5>::value;\n\t\tif (J6 > 0) {\n\t\t  offs[6] = offset_<6>::value;\n\t\t}\n\t      }\n\t    }\n\t  }\n\t}\n      }\n      return offs;\n    }\n\n    const Index& last_offset() const { return offset_<rank-1>::value; }\n\n    // Return true if the array is empty\n    bool empty() const { return (J0 == 0); }\n\n    // Return a string describing the array\n    std::string info_string() const {\n      std::stringstream str;\n      str << \"FixedArray<\" << rank << \">, dim=\" << dimensions() << \", data_location=\" << data_;\n      if (IsActive) {\n\tstr << \", gradient_index=\" << gradient_index();\n      }\n      return str.str();\n    }\n\n    // Return a pointer to the start of the data\n    Type* data() { return data_; }\n    const Type* data() const { return data_; }\n    const Type* const_data() const { return data_; }\n\n    // Older style\n    Type* data_pointer() { return data_; }\n    const Type* data_pointer() const { return data_; }\n    const Type* const_data_pointer() const { return data_; }\n\n    // For vectors only, we allow a pointer to be returned to a\n    // specified element\n    Type* data_pointer(Index i) { \n      ADEPT_STATIC_ASSERT(rank == 1, CAN_ONLY_USE_DATA_POINTER_WITH_INDEX_ON_VECTORS);\n      if (data_) {\n\treturn data_ + i;\n      }\n      else {\n\treturn 0;\n      }\n    }\n    const Type* const_data_pointer(Index i) const { \n      ADEPT_STATIC_ASSERT(rank == 1, CAN_ONLY_USE_CONST_DATA_POINTER_WITH_INDEX_ON_VECTORS);\n      if (data_) {\n\treturn data_ + i;\n      }\n      else {\n\treturn 0;\n      }\n    }\n   \n    bool is_aliased_(const Type* mem1, const Type* mem2) const {\n      Type const * ptr_begin;\n      Type const * ptr_end;\n      data_range(ptr_begin, ptr_end);\n      if (ptr_begin <= mem2 && ptr_end >= mem1) {\n\treturn true;\n      }\n      else {\n\treturn false;\n      }\n    }\n\n    // By design, FixedArrays are row-major and row-wise access is\n    // contiguous\n    bool all_arrays_contiguous_() const { return true; }\n \n    bool is_aligned_() const {\n      return !(reinterpret_cast<std::size_t>(data_) & Packet<Type>::align_mask);\n    }\n\n    template <int n>\n    int alignment_offset_() const {\n      return (reinterpret_cast<std::size_t>(data_)/sizeof(Type)) % n; \n    }\n\n    Type value_with_len_(const Index& j, const Index& len) const {\n      ADEPT_STATIC_ASSERT(rank == 1, CANNOT_USE_VALUE_WITH_LEN_ON_ARRAY_OF_RANK_OTHER_THAN_1);\n      return data_[j];\n    }\n\n    std::string expression_string_() const {\n      if (true) {\n\tstd::string a = internal::fixed_array_helper<rank,IsActive>().name();\n\ta += dimensions().str();\n\treturn a;\n      }\n      else {\n\tstd::stringstream s;\n\tprint(s);\n\treturn s.str();\n      }\n    }\n\n    // The same as operator=(inactive scalar) but does not put\n    // anything on the stack\n    template <typename RType>\n    typename internal::enable_if<internal::is_not_expression<RType>::value, FixedArray&>::type\n    set_value(RType x) {\n      if (!empty()) {\n\tassign_inactive_scalar_<rank,false>(x);\n      }\n      return *this;\n    }\n  \n    \n    // Return the gradient index for the first element in the array,\n    // or -1 if not active\n    Index gradient_index() const {\n      return internal::GradientIndex<IsActive>::get();\n    }\n\n    std::ostream& print(std::ostream& os) const {\n      const Array<rank,Type,IsActive> x(*this);\n      x.print(os);\n      return os;\n    }\n\n    // Get pointers to the first and last data members in memory.  \n    void data_range(Type const * &data_begin, Type const * &data_end) const {\n      data_begin = data_;\n      data_end = data_ + length_-1;\n    }\n\n  \n    // The Stack::independent(x) and Stack::dependent(y) functions add\n    // the gradient_index of objects x and y to std::vector<uIndex>\n    // objects in Stack. Since x and y may be scalars or arrays, this\n    // is best done by delegating to the Active or FixedArray classes.\n    template <typename IndexType>\n    void push_gradient_indices(std::vector<IndexType>& vec) const {\n      ADEPT_STATIC_ASSERT(IsActive,\n\t\t  CANNOT_PUSH_GRADIENT_INDICES_FOR_INACTIVE_ARRAY); \n      ExpressionSize<rank> i(0);\n      Index gradient_ind = gradient_index();\n      Index index = 0;\n      int my_rank;\n      vec.reserve(vec.size() + size());\n      do {\n\t// Innermost loop - note that the counter is index, not max_index\n\tfor (Index max_index = index + dimension_<rank-1>::value*offset_<rank-1>::value;\n\t     index < max_index;\n\t     index += offset_<rank-1>::value) {\n\t  vec.push_back(gradient_ind + index);\n\t}\n\t// Increment counters appropriately depending on which\n\t// dimensions have been finished\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    // Return inactive array linked to original data\n    Array<rank, Type, false> inactive_link() {\n      return Array<rank, Type, false>(data_, 0, dimensions(), offset(),\n\t\t\t\t       internal::GradientIndex<IsActive>::get());\n    }\n\n    // Transpose helper functions\n  protected:\n    template<int MyRank>\n    typename internal::enable_if<MyRank == 2, Array<2,Type,IsActive> >::type\n    my_T() {\n      // Transpose 2D array: create output array initially as link\n      // to input array\n      Array<2,Type,IsActive> out(*this);\n      // Swap dimensions\n      return out.in_place_transpose();\n    }\n    template<int MyRank>\n    typename internal::enable_if<MyRank == 2, const Array<2,Type,IsActive> >::type\n    my_T() const {\n      // Transpose 2D array: create output array initially as link\n      // to input array\n      Array<2,Type,IsActive> out(const_cast<FixedArray&>(*this));\n      // Swap dimensions\n      return out.in_place_transpose();\n    }\n\n  public:\n    // Out-of-place transpose\n    Array<2,Type,IsActive>\n    T() {\n      ADEPT_STATIC_ASSERT(rank == 1 || rank == 2, \n\t\t\t  TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS);\n      return my_T<rank>();\n    }\n    const Array<2,Type,IsActive>\n    T() const {\n      ADEPT_STATIC_ASSERT(rank == 1 || rank == 2, \n\t\t\t  TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS);\n      return my_T<rank>();\n    }\n\n    // \"permute\" is a generalized transpose, returning an FixedArray linked\n    // to the current one but with the dimensions rearranged according\n    // to idim: idim[0] is the 0-based number of the dimension of the\n    // current array that will be dimension 0 of the new array,\n    // idim[1] is the number of the dimension of the current array\n    // that will be dimension 1 of the new array and so on.\n    Array<rank,Type,IsActive> permute(const Index* idim) {\n      if (empty()) {\n\tthrow empty_array(\"Attempt to permute an empty array\"\n\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      ExpressionSize<rank> new_dims(0);\n      ExpressionSize<rank> new_offset;\n      ExpressionSize<rank> dims, offs;\n      dims = dimensions();\n      offs = offset();\n      for (int i = 0; i < rank; ++i) {\n\tif (idim[i] >= 0 && idim[i] < rank) {\n\t  new_dims[i] = dims[idim[i]];\n\t  new_offset[i] = offs[idim[i]];\n\t}\n\telse {\n\t  throw invalid_dimension(\"Dimensions must be in range 0 to rank-1 in permute\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      for (int i = 0; i < rank; ++i) {\n\tif (new_dims[i] == 0) {\n\t  throw invalid_dimension(\"Missing dimension in permute\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      return Array<rank,Type,IsActive>(data_, 0, new_dims, new_offset,\n\t\t\t\t\tinternal::GradientIndex<IsActive>::get());\n    }\n\n    Array<rank,Type,IsActive> permute(const ExpressionSize<rank>& idim) {\n      return permute(&idim[0]);\n    }\n\n    // Up to 7 dimensions we can specify the dimensions as separate\n    // arguments\n    typename internal::enable_if<(rank < 7), Array<rank,Type,IsActive> >::type\n    permute(Index i0, Index i1, Index i2 = -1, Index i3 = -1, Index i4 = -1,\n\t    Index i5 = -1, Index i6 = -1) {\n      Index idim[7] = {i0, i1, i2, i3, i4, i5, i6};\n      for (int i = 0; i < rank; ++i) {\n\tif (idim[i] == -1) {\n\t  throw invalid_dimension(\"Incorrect number of dimensions provided to permute\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      return permute(idim);\n    }\n\n    // Return an inactive array of the same type and rank as the\n    // present active fixed array, containing the gradients associated\n    // with it\n    template <typename MyType>\n    void get_gradient(Array<rank,MyType,false>& gradient) const {\n      ADEPT_STATIC_ASSERT(IsActive,CANNOT_USE_GET_GRADIENT_ON_INACTIVE_ARRAY);\n      if (gradient.empty()) {\n\tgradient.resize(dimensions());\n      }\n      else if (gradient.dimensions() != dimensions()) {\n\tthrow size_mismatch(\"Attempt to get_gradient with array of different dimensions\"\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\n      }\n      static const int last = rank-1;\n      ExpressionSize<rank> target_offset = gradient.offset();\n      ExpressionSize<rank> i(0);\n      Index index = 0;\n      int my_rank;\n      Index index_target = 0;\n      Index last_dim_stretch = dimension_<rank-1>::value*offset_<rank-1>::value;\n      MyType* target = gradient.data();\n      do {\n\ti[last] = 0;\n\tindex_target = 0;\n\tfor (int r = 0; r < rank-1; r++) {\n\t  index_target += i[r]*target_offset[r];\n\t}\n\tADEPT_ACTIVE_STACK->get_gradients(gradient_index()+index,\n\t\t\t\t  gradient_index()+index+last_dim_stretch,\n\t\t\t\t\t  target+index_target, offset_<rank-1>::value, target_offset[last]);\n\tindex += last_dim_stretch;\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n    // Return an inactive array of the same type and rank as the\n    // present active array containing the gradients associated with\n    // it\n    Array<rank,Type,false> get_gradient() const {\n      Array<rank,Type,false> gradient;\n      get_gradient(gradient);\n      return gradient;\n    }\n\n    void\n    put(std::vector<typename internal::active_scalar<Type,IsActive>::type>& data) const {\n      ADEPT_STATIC_ASSERT(rank == 1, PUT_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS);\n      if (data.size() != J0) {\n\tdata.resize(J0);\n      }\n      for (Index i = 0; i < J0; ++i) {\n\tdata[i] = (*this)(i);\n      }  \n    }\n\n    void\n    get(const std::vector<typename internal::active_scalar<Type,IsActive>::type>& data) {\n      ADEPT_STATIC_ASSERT(rank == 1, GET_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS);\n      if (data.size() != J0) {\n\tresize(data.size());\n      }\n      for (Index i = 0; i < J0; ++i) {\n\t(*this)(i) = data[i];\n      }  \n    }\n\n\n    // -------------------------------------------------------------------\n    // FixedArray: 6. Member functions accessed by the Expression class\n    // -------------------------------------------------------------------\n\n    template <int MyArrayNum, int NArrays>\n    void set_location_(const ExpressionSize<rank>& i, \n\t\t       ExpressionSize<NArrays>& index) const {\n      index[MyArrayNum] = index_(i);\n    }\n    \n    template <int MyArrayNum, int NArrays>\n    Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return data_[loc[MyArrayNum]];\n    }\n    template <int MyArrayNum, int NArrays>\n    Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return Packet<Type>(data_+loc[MyArrayNum]);\n    }\n\n    Type& lvalue_at_location(const Index& loc) {\n      return data_[loc];\n    }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t  internal::ScratchVector<NScratch>& scratch) const {\n      return data_[loc[MyArrayNum]];\n\n    }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const internal::ScratchVector<NScratch>& scratch) const {\n      return data_[loc[MyArrayNum]];\n    }\n\n    template <int MyArrayNum, int NArrays>\n    void advance_location_(ExpressionSize<NArrays>& loc) const {\n      loc[MyArrayNum] += offset_<rank-1>::value;\n    }\n\n    // If an expression leads to calc_gradient being called on an\n    // active object, we push the multiplier and the gradient index on\n    // to the operation stack (or 1.0 if no multiplier is specified\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch) const {\n      stack.push_rhs(1.0, gradient_index() + loc[MyArrayNum]);\n    }\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n    void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch,\n\t\t\tconst MyType& multiplier) const {\n      stack.push_rhs(multiplier, gradient_index() + loc[MyArrayNum]);\n    }\n  \n\n\n    // -------------------------------------------------------------------\n    // FixedArray: 7. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n\n    // Return the memory index (relative to data_) for array element\n    // indicated by j\n    Index index_(Index j[rank]) const {\n      Index o = 0;\n      ExpressionSize<rank> offs = offset();\n      for (int i = 0; i < rank; i++) {\n\to += j[i]*offs[i];\n      }\n      return o;\n    }\n    Index index_(const ExpressionSize<rank>& j) const {\n      Index o = 0;\n      for (int i = 0; i < rank; i++) {\n\to += j[i]*offset(i);\n      }\n      return o;\n    }\n\n    // Used in traversing through an array\n    void advance_index(Index& index, int& my_rank, ExpressionSize<rank>& i) const {\n      index -= offset_<rank-1>::value*dimension_<rank-1>::value;\n      my_rank = rank-1;\n      while (--my_rank >= 0) {\n\tif (++i[my_rank] >= dimension(my_rank)) {\n\t  i[my_rank] = 0;\n\t  index -= offset(my_rank)*(dimension(my_rank)-1);\n\t}\n\telse {\n\t  index += offset(my_rank);\n\t  break;\n\t}\n      }\n    }\n\n    // When assigning a scalar to a whole array, there may be\n    // advantage in specialist behaviour depending on the rank of the\n    // array. This is a generic one that copies the number but treats\n    // the present array as passive.\n    template <int LocalRank, bool LocalIsActive, typename X>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_inactive_scalar_(X x) {\n      ExpressionSize<LocalRank> i(0);\n      Index index = 0;\n      int my_rank;\n      do {\n\t// Innermost loop - note that the counter is index, not max_index\n\tfor (Index max_index = index + dimension_<LocalRank-1>::value*offset_<LocalRank-1>::value;\n\t     index < max_index;\n\t     index += offset_<LocalRank-1>::value) {\n\t  data_[index] = x;\n\t}\n\t// Increment counters appropriately depending on which\n\t// dimensions have been finished\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    // An active array being assigned the value of an inactive scalar\n    template <int LocalRank, bool LocalIsActive, typename X>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_inactive_scalar_(X x) {\n      // If not recording we call the inactive version instead\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (! ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_inactive_scalar_<LocalRank, false, X>(x);\n\treturn;\n      }\n#endif\n\n      ExpressionSize<LocalRank> i(0);\n      Index gradient_ind = gradient_index();\n      Index index = 0;\n      int my_rank;\n      do {\n\t// Innermost loop\n\tADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimension_<LocalRank-1>::value,\n\t\t\t\t\t   offset_<LocalRank-1>::value);\n\tfor (Index max_index = index + dimension_<LocalRank-1>::value*offset_<LocalRank-1>::value;\n\t     index < max_index; index += offset_<LocalRank-1>::value) {\n\t  data_[index] = x;\n\t}\n\n\t// Increment counters appropriately depending on which\n\t// dimensions have been finished\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    // When copying an expression to a whole array, there may be\n    // advantage in specialist behaviour depending on the rank of the\n    // array\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_expression_(const E& rhs) {\n      ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY);\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = LocalRank-1;\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimension_<LocalRank-1>::value; ++i[last],\n\t\tindex += offset_<LocalRank-1>::value) {\n\t  data_[index] = rhs.next_value(ind);\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    typename internal::enable_if<LocalIsActive && EIsActive,void>::type\n    assign_expression_(const E& rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_expression_<LocalRank,false,false>(rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = LocalRank-1;\n\n      ADEPT_ACTIVE_STACK->check_space(internal::expr_cast<E>::n_active * size());\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimension_<LocalRank-1>::value; ++i[last],\n\t\tindex += offset_<LocalRank-1>::value) {\n\t  data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, ind);\n\t  ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active?\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    template<int LocalRank, bool LocalIsActive, bool EIsActive, class E>\n    typename internal::enable_if<LocalIsActive && !EIsActive,void>::type\n    assign_expression_(const E& rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_expression_<LocalRank,false,false>(rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<LocalRank> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index index = 0;\n      int my_rank;\n      Index gradient_ind = gradient_index();\n      static const int last = LocalRank-1;\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, ind);\n\t// Innermost loop\n\tADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimension_<LocalRank-1>::value,\n\t\t\t\t\t   offset_<LocalRank-1>::value);\n\tfor ( ; i[last] < dimension_<LocalRank-1>::value; ++i[last],\n\t\tindex += offset_<LocalRank-1>::value) {\n\t  data_[index] = rhs.next_value(ind);\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n\n    template<bool LocalIsActive, class B, typename C>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) {\n      ExpressionSize<rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = rank-1;\n\n      do {\n\ti[last] = 0;\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimension_<rank-1>::value; ++i[last],\n\t\tindex += offset_<rank-1>::value) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    data_[index] = rhs;\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    template<bool LocalIsActive, class B, typename C>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (! ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_conditional_inactive_scalar_<false, B, C>(bool_expr, rhs);\n\treturn;\n      }\n#endif\n\n      ExpressionSize<rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = rank-1;\n\n      do {\n\ti[last] = 0;\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimension_<rank-1>::value; ++i[last],\n\t\tindex += offset_<rank-1>::value) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    data_[index] = rhs;\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index);\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n    template<bool LocalIsActive, class B, class C>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_conditional_(const B& bool_expr, const C& rhs) {\n      ExpressionSize<rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      ExpressionSize<internal::expr_cast<C>::n_arrays> rhs_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = rank-1;\n      bool is_gap = false;\n\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, rhs_ind);\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimension_<rank-1>::value; ++i[last],\n\t\tindex += offset_<rank-1>::value) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    if (is_gap) {\n\t      rhs.set_location(i, rhs_ind);\n\t      is_gap = false;\n\t    }\n\t    data_[index] = rhs.next_value(rhs_ind);\n\t  }\n\t  else {\n\t    is_gap = true;\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n    template<bool LocalIsActive, class B, class C>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_conditional_(const B& bool_expr, const C& rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_conditional_<false>(bool_expr, rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<rank> i(0);\n      ExpressionSize<internal::expr_cast<B>::n_arrays> bool_ind(0);\n      ExpressionSize<internal::expr_cast<C>::n_arrays> rhs_ind(0);\n      Index index = 0;\n      int my_rank;\n      static const int last = rank-1;\n      bool is_gap = false;\n\n      ADEPT_ACTIVE_STACK->check_space(internal::expr_cast<C>::n_active * size());\n      do {\n\ti[last] = 0;\n\trhs.set_location(i, rhs_ind);\n\tbool_expr.set_location(i, bool_ind);\n\t// Innermost loop\n\tfor ( ; i[last] < dimension_<rank-1>::value; ++i[last],\n\t\tindex += offset_<rank-1>::value) {\n\t  if (bool_expr.next_value(bool_ind)) {\n\t    if (is_gap) {\n\t      rhs.set_location(i, rhs_ind);\n\t      is_gap = false;\n\t    }\n\t    data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, rhs_ind);\n\t    ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active?\n\t  }\n\t  else {\n\t    is_gap = true;\n\t  }\n\t}\n\tadvance_index(index, my_rank, i);\n      } while (my_rank >= 0);\n    }\n\n\n    // -------------------------------------------------------------------\n    // FixedArray: 8. Data\n    // -------------------------------------------------------------------\n  protected:\n    Type data_[length_]; // Stored on the stack\n\n  }; // End of FixedArray class\n\n\n  // -------------------------------------------------------------------\n  // Helper functions\n  // -------------------------------------------------------------------\n\n  // Print array on a stream\n  template <typename Type, bool IsActive, Index J0,Index J1,\n\t    Index J2,Index J3,Index J4,Index J5,Index J6>\n  inline\n  std::ostream&\n  operator<<(std::ostream& os, const FixedArray<Type,IsActive,J0,J1,J2,J3,J4,J5,J6>& A) {\n    const Array<internal::fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank,Type,IsActive> B = A; // link to original data\n    return B.print(os);\n  }\n\n\n  // Extract inactive part of array, working correctly depending on\n  // whether argument is active or inactive\n  template <typename Type, Index J0,Index J1,Index J2,Index J3,\n\t   Index J4,Index J5,Index J6>\n  inline\n  FixedArray<Type, false,J0,J1,J2,J3,J4,J5,J6>&\n  value(FixedArray<Type, false,J0,J1,J2,J3,J4,J5,J6>& expr) {\n    return expr;\n  }\n  template <typename Type, Index J0,Index J1,Index J2, Index J3,\n\t   Index J4,Index J5,Index J6>\n  inline\n  FixedArray<Type, false,J0,J1,J2,J3,J4,J5,J6>\n  value(FixedArray<Type, true,J0,J1,J2,J3,J4,J5,J6>& expr) {\n    return expr.inactive_link();\n  }\n\n  // -------------------------------------------------------------------\n  // Transpose function\n  // -------------------------------------------------------------------\n\n  // Transpose 2D array\n  template<typename Type, bool IsActive, Index J0, Index J1>\n  inline\n  Array<2,Type,IsActive>\n  transpose(FixedArray<Type,IsActive,J0,J1>& in) {\n    // Create output array initially as link to input array \n    Array<2,Type,IsActive> out(in);\n    // Swap dimensions\n    return out.in_place_transpose();\n  }\n\n  // Extract the gradients from an active FixedArray after the\n  // Stack::forward or Stack::reverse functions have been called\n  template<typename Type, typename dType, Index J0, Index J1,\n\t   Index J2, Index J3, Index J4, Index J5, Index J6>\n  inline\n  void get_gradients(const FixedArray<Type,true,J0,J1,J2,J3,J4,J5,J6>& a,\n\t\t     FixedArray<dType,false,J0,J1,J2,J3,J4,J5,J6>& data)\n  {\n    data = a.get_gradient();\n  }\n\n  template <typename T, bool IsActive, typename E, Index J0, \n\t    Index J1, Index J2, Index J3, Index J4, Index J5, Index J6>\n  internal::Allocator<internal::fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank,\n\t\t      FixedArray<T,IsActive,J0,J1,J2,J3,J4,J5,J6> > \n  operator<<(FixedArray<T,IsActive,J0,J1,J2,J3,J4,J5,J6>& array, const E& x) {\n    return internal::Allocator<internal::fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank,\n      FixedArray<T,IsActive,J0,J1,J2,J3,J4,J5,J6> >(array, x);\n  }\n\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/GradientIndex.h",
    "content": "\n\n#ifndef AdeptGradientIndex_H\n#define AdeptGradientIndex_H 1\n\n#include <adept/Stack.h>\n\nnamespace adept {\n  namespace internal {\n\n    // Arrays inherit from this class to provide optional storage of\n    // the gradient index of the first value of the array depending on\n    // whether the array is active or not\n    template <bool IsActive>\n    struct GradientIndex {\n      // Constructor used when linking to existing data where gradient\n      // index is known\n      GradientIndex(Index val = -9999) : value_(val) { }\n      // Constructor used for fixed array objects where length is\n      // known\n      GradientIndex(Index n, bool) : value_(ADEPT_ACTIVE_STACK->register_gradients(n)) { }\n      GradientIndex(Index val, Index offset) : value_(val+offset) { }\n      Index get() const { return value_; }\n      void set(Index val) { value_ = val; }\n      void clear() { value_ = -9999; }\n      template <typename Type>\n      void set(const Type* data, const Storage<Type>* storage) {\n\tvalue_ = (storage->gradient_index() + (data - storage->data()));\n      }\n      void assert_inactive() {\n\tthrow invalid_operation(\"Operation applied that is invalid with active arrays\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      void unregister(Index n) { ADEPT_ACTIVE_STACK->unregister_gradients(value_, n); }\n#ifdef ADEPT_MOVE_SEMANTICS\n      void swap_value(GradientIndex& rhs) noexcept {\n\tIndex tmp_value = rhs.get();\n\trhs.set(value_);\n\tvalue_ = tmp_value;\n      }\n#endif\n    private:\n      Index value_;\n    };\n\n    template <>\n    struct GradientIndex<false> {\n      GradientIndex(Index val = -9999) { }\n      GradientIndex(Index, bool) { }\n      GradientIndex(Index val, Index offset) { }\n      Index get() const { return -9999; }\n      void set(Index val) { }\n      void clear() { }\n      template <typename Type>\n      void set(const Type* data, const Storage<Type>* storage) { }\n      void assert_inactive() { }\n      void unregister(Index) { }\n#ifdef ADEPT_MOVE_SEMANTICS\n      void swap_value(GradientIndex& rhs) noexcept { }\n#endif\n    };\n\n  };\n};\n\n#endif\n"
  },
  {
    "path": "include/adept/IndexedArray.h",
    "content": "/* IndexedArray.h -- Support for indexed arrays\n\n    Copyright (C) 2015-2018 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n  \n   If an Array is indexed via A(i,j,...) then there are three possible\n   return values: (1) a scalar, if all indices are scalar integers\n   (including 0-rank expressions such as \"end\"); (2) an Array that\n   links to a subset of the data in the original Array, if one or more\n   of the indices is a RangeIndex object and all the rest are scalar\n   integers; and (3) an IndexedArray object, if one or more of the\n   indices is a vector of integers.  All of these return values can be\n   used on the left-hand-side of an expression.\n\n   This file treats the last case.  The code is quite complex because\n   the rank of the IndexedArray may be reduced compared to the\n   original Array, since dimensions indexed by scalar integers are\n   removed in IndexedArray.\n\n*/\n\n\n#ifndef AdeptIndexedArray_H\n#define AdeptIndexedArray_H 1\n\n#include <vector>\n\n#include <adept/Expression.h>\n\nnamespace adept {\n\n  // ---------------------------------------------------------------------\n  // Section 0: Forward declarations \n  // ---------------------------------------------------------------------\n  \n  template <int Rank, typename Type, bool IsActive> class Array;\n\n  \n  namespace internal {\n    \n    // ---------------------------------------------------------------------\n    // Section 1. get_size_with_len\n    // ---------------------------------------------------------------------\n    // Return the size of an index to an individual dimension, with\n    // specializations for the different types of index. The second\n    // argument passes in the length of the dimension being indexed;\n    // that way if any of the indices are expressions containing\n    // \"end\", this will be replaced by that dimension length minus 1.\n\n    // A scalar integer and rank-0 expression have a size of unity\n    inline\n    Index get_size_with_len(const Index& j, const Index&) { return 1; }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && E::rank == 0, Index>::type\n    get_size_with_len(const Expression<T,E>&, const Index& len) { return 1; }\n\n    // Extract the length of an IntVector\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && E::rank == 1 && !is_range<E>::value, Index>::type\n    get_size_with_len(const Expression<T,E>& e, const Index& len) { \n      ExpressionSize<1> s;\n      e.get_dimensions(s);\n      return s[0];\n    }\n\n    // Extract the length of a RangeIndex object, which might be\n    // dependent on len if \"end\" is present\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && is_range<E>::value, Index>::type\n    get_size_with_len(const Expression<T,E>& e, const Index& len) { \n      return e.cast().size_with_len_(len);\n    }\n\n    // Allow std::vector to be used to index Arrays\n    template <typename T>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer, Index>::type\n    get_size_with_len(const std::vector<T>& v, const Index&) { \n      return v.size();\n    }\n\n\n    // ---------------------------------------------------------------------\n    // Section 2. get_value_with_len\n    // ---------------------------------------------------------------------\n    // Return the j'th value of index ind.\n\n#ifndef ADEPT_BOUNDS_CHECKING\n    // For scalar indices there is only one value to return - j ought\n    // to be zero but we don't check this\n    inline\n    Index get_value_with_len(const Index& ind, const Index& j, const Index&)\n    { return ind; }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && (E::rank < 2), Index>::type\n    get_value_with_len(const Expression<T,E>& ind, const Index& j, \n\t\t       const Index& len) {\n      return ind.value_with_len(j, len); \n    }\n\n    template <typename T>\n    inline\n    Index get_value_with_len(const std::vector<T>& ind, const Index& j, \n\t\t\t     const Index&) { \n      return ind[j];\n    }\n#else\n    // For scalar indices there is only one value to return - j ought\n    // to be zero but we don't check this\n    inline\n    Index get_value_with_len(const Index& ind, const Index& j, const Index& len)   { \n      if (j != 0) {\n\tthrow index_out_of_bounds(\"Index to IndexedArray is out of bounds\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (ind < 0 || ind >= len) {\n\tthrow index_out_of_bounds(\"Scalar index out of bounds in IndexedArray\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\treturn ind; \n      }\n    }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && (E::rank < 2), Index>::type\n    get_value_with_len(const Expression<T,E>& ind, const Index& j, \n\t\t       const Index& len) {\n      Index i = ind.value_with_len(j, len);\n      if (i < 0 || i >= len) {\n\tthrow index_out_of_bounds(\"Index out of bounds in IndexedArray\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\treturn i;\n      }\n    }\n\n    template <typename T>\n    inline\n    Index get_value_with_len(const std::vector<T>& ind, const Index& j, \n\t\t\t     const Index& len) {\n      Index i = ind[j];\n      if (i < 0 || i >= len) {\n\tthrow index_out_of_bounds(\"Index from std::vector out of bounds in IndexedArray\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\treturn i;\n      }    \n    }\n#endif\n\n    // ---------------------------------------------------------------------\n    // Section 3. is_int_vector\n    // ---------------------------------------------------------------------\n    // is_int_vector<Type>::value is \"true\" if Type is a rank-1\n    // integer expression (including RangeIndex objects), false\n    // otherwise.\n\n    template <typename T, class Enable = void>\n    struct is_int_vector { };\n\n    template <typename T>\n    struct is_int_vector<T,\n\t typename enable_if<is_not_expression<T>::value>::type>\n    { static const bool value = false; };\n\n    template <typename T>\n    struct is_int_vector<T,\n       typename enable_if<!is_not_expression<T>::value>::type>\n    {\n      static const bool value \n      = std::numeric_limits<typename T::type>::is_integer\n\t&& expr_cast<T>::rank == 1;\n    };\n    \n    template <typename T>\n    struct is_index {\n      static const bool value = is_regular_index<T>::value \n\t|| is_int_vector<T>::value;\n      static const int count = value;\n    };\n\n    template <typename T>\n    struct is_irregular_index {\n      static const bool value = !is_range<T>::value \n\t&& is_int_vector<T>::value;\n      static const int count = value;\n    };\n    \n    \n    // ---------------------------------------------------------------------\n    // Section 4. is_irregular_index\n    // ---------------------------------------------------------------------\n\n    // is_irregular_index<Rank,I0,I1,...>::value is \"true\" if indices\n    // I0 to I[Rank-1] contains at least one integer vector that could\n    // be irregularly spaced, and all the other are valid indices.\n    // The ::count member gives the number of non-scalar indices,\n    // which is the rank of the IndexedArray objects resulting from\n    // indexing an Array of the specified Rank with indices I0 to\n    // I[Rank-1].\n    template <int Rank, typename I0, typename I1 = Index, \n\t      typename I2 = Index, typename I3 = Index,\n\t      typename I4 = Index, typename I5 = Index,\n\t      typename I6 = Index>\n    struct is_irreg_indexed {\n      static const bool value\n        = (   is_irregular_index<I0>::value || is_irregular_index<I1>::value\n\t   || is_irregular_index<I2>::value || is_irregular_index<I3>::value\n\t   || is_irregular_index<I4>::value || is_irregular_index<I5>::value\n\t   || is_irregular_index<I6>::value)\n\t&& (   is_index<I0>::value && is_index<I1>::value\n\t    && is_index<I2>::value && is_index<I3>::value\n\t    && is_index<I4>::value && is_index<I5>::value\n\t    && is_index<I6>::value);\n      static const int count \n         = 7 - (  is_scalar_int<I0>::count + is_scalar_int<I1>::count\n\t\t+ is_scalar_int<I2>::count + is_scalar_int<I3>::count\n\t\t+ is_scalar_int<I4>::count + is_scalar_int<I5>::count\n\t\t+ is_scalar_int<I6>::count);\n    };\n    \n\n    // ---------------------------------------------------------------------\n    // Section 5. IndexedArray class\n    // ---------------------------------------------------------------------\n    // A class holding references to an Array to be indexed, plus\n    // references to the objects corresponding to each of its\n    // dimension being indexed.  IndexedArray objects are temporary,\n    // generated by indexing an Array object \"A\" via A(i,j,...) within\n    // an expression.  The indices themselves may be temporary results\n    // of integer expressions, but by C++ rules they will not be\n    // deleted until the full expression is complete.\n    template <int Rank, typename Type, bool IsActive, \n\t      class ArrayType, class I0, \n\t      class I1 = Index, class I2 = Index, \n\t      class I3 = Index, class I4 = Index, \n\t      class I5 = Index, class I6 = Index>\n    class IndexedArray : public Expression<Type, \n\t\t   IndexedArray<Rank, Type, IsActive, ArrayType, \n\t\t\t\tI0, I1, I2, I3, I4, I5, I6> > {\n    public:\n      // ---------------------------------------------------------------------\n      // Section 5.1. IndexedArray: Static definitions\n      // ---------------------------------------------------------------------\n      static const int  rank       = Rank;\n      static const int  n_scratch  = 1;\n      static const int  n_active   = IsActive;\n\n      // We require three indices to be stored to optimize the\n      // calculation of the location: first the location of the start\n      // of the row, second the index to i[Rank-1] (0, 1, 2...), and \n      // third the location passed to the Array\n      static const int  n_arrays   = 3;\n      static const bool is_active  = IsActive;\n\n      // The rank of the array being indexed may be higher than the\n      // result of the index due to singleton indices\n      // (e.g. Matrix(IntVector,int) has rank 1 even though Matrix has\n      // rank 2).\n      static const int  a_rank      = ArrayType::rank;\n\n\n      // ---------------------------------------------------------------------\n      // Section 5.2. IndexedArray: Constructors\n      // ---------------------------------------------------------------------\n      // Make default constructor that the compiler might generate\n      // itself unreachable\n    private:\n      IndexedArray() { }\n\n    public:\n      // The constructor sets all unused indices to an integer of zero\n      IndexedArray(ArrayType& a, const I0& i0,\n\t\t   const I1& i1 = 0, const I2& i2 = 0,\n\t\t   const I3& i3 = 0, const I4& i4 = 0,\n\t\t   const I5& i5 = 0, const I6& i6 = 0)\n\t: a_(a), i0_(i0), i1_(i1), i2_(i2), i3_(i3), \n\t  i4_(i4), i5_(i5), i6_(i6), a_dims_(a.dimensions())\n      {\n\t// Compute the dimensions of the IndexedArray objects from the\n\t// lengths of the non-singleton indices to Array\n\tset_dimensions_<0,0>(); \n\n\t// For stepping through memory efficiently in the inner loop,\n\t// we store the distance between elements in the fastest\n\t// varying dimension in Array\n\tlast_offset_ = a.offset()[a_fastest_varying_dim];\n      }\n\n      // ---------------------------------------------------------------------\n      // Section 5.3. IndexedArray: Functions facilitating Expression functionality\n      // ---------------------------------------------------------------------\n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\tdim = dimensions_;\n\treturn true;\n      }\n      \n      std::string info_string() const {\n\tstd::stringstream s;\n\ts << expression_string_() \n\t  << \", array-dim=\" << a_dims_ << \", dim=\" << dimensions_\n\t  << \", last-offset_=\" << last_offset_;\n\treturn s.str();\t\n      }\n\n      std::string expression_string_() const {\n\tstd::string str;\n\tstr = a_.expression_string() + \"(\";\n\tstr += expr_string(i0_);\n\tif (a_rank > 1) {\n\t  str += std::string(\",\") + expr_string(i1_);\n\t  if (a_rank > 2) {\n\t    str += std::string(\",\") + expr_string(i2_);\n\t    if (a_rank > 3) {\n\t      str += std::string(\",\") + expr_string(i3_);\n\t      if (a_rank > 4) {\n\t\tstr += std::string(\",\") + expr_string(i4_);\n\t\tif (a_rank > 5) {\n\t\t  str += std::string(\",\") + expr_string(i5_);\n\t\t  if (a_rank > 6) {\n\t\t    str += std::string(\",\") + expr_string(i6_);\n\t\t  }\n\t\t}\n\t      }\n\t    }\n\t  }\n\t}\n\tstr += \")\";\n\treturn str;\n      }\n     \n    protected:\n      // Helper functions for expression_string()\n      template <typename T, typename E>\n      std::string expr_string(const Expression<T,E>& e) const {\n\treturn e.expression_string();\n      }\n      template <typename T>\n      typename enable_if<is_not_expression<T>::value, std::string>::type\n      expr_string(const T& e) const {\n\tstd::stringstream s;\n\ts << e;\n\treturn s.str();\n      }\n\n    public:\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn a_.is_aliased(mem1, mem2);\n      }\n\n      Type value_with_len_(const Index& i, const Index& len) const {\n\t// Treat as one dimensional\n\treturn a_(get_value_with_len_<Rank-1>(i));\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& coords,\n\t\t\t ExpressionSize<NArrays>& loc) const {\n\tExpressionSize<a_rank> a_coords;\n\ttranslate_coords_<0,0>(coords, a_coords);\n\t// Location of start of most rapidly varying dimension in\n\t// Array\n\ta_.template set_location_<MyArrayNum>(a_coords, loc);\n\t// Index to most rapidly varying dimension in IndexedArray\n\tloc[MyArrayNum+1] = coords[Rank-1];\n\tloc[MyArrayNum+2] = loc[MyArrayNum] + last_offset_\n\t  * get_value_with_len_<a_fastest_varying_dim>(loc[MyArrayNum+1]);\n      }\n\n      // Advance the location of each array in the expression\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\t++loc[MyArrayNum+1];\n\t// Note that next_value calls advance_location even when it\n\t// has reached the end of a row, in which case finding the\n\t// location of an indexed array is an invalid operation since\n\t// it would require accessing the indexing array out of\n\t// bounds. Hence the \"if\" test here.\n\tif (loc[MyArrayNum+1] < dimensions_[Rank-1]) {\n\t  loc[MyArrayNum+2] = loc[MyArrayNum] + last_offset_\n\t    * get_value_with_len_<a_fastest_varying_dim>(loc[MyArrayNum+1]);\n\t}\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn a_.template value_at_location_<MyArrayNum+2>(loc);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\tADEPT_STATIC_ASSERT(ArrayType::n_scratch == 0,\n\t\t\t    ASSUMING_ARRAY_N_SCRATCH_IS_ZERO);\n\treturn (scratch[MyScratchNum] \n\t\t= a_.template value_at_location_<MyArrayNum+2>(loc));\n      }\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\ta_.template calc_gradient_<MyArrayNum+2,MyScratchNum+1>(stack, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch,\n\t\ttypename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n\ta_.template calc_gradient_<MyArrayNum+2, MyScratchNum+1>(stack, loc, \n\t\t\t\t\t\t\t\t scratch, multiplier);\n      }\n\n\n      // ---------------------------------------------------------------------\n      // Section 5.4. IndexedArray: Operators\n      // ---------------------------------------------------------------------\n      // Operators so that IndexedArray can appear on the\n      // left-hand-side of a statement\n      IndexedArray& operator=(const IndexedArray& src) {\n\t*this = static_cast<const Expression<Type,IndexedArray>&>(src);\n\treturn *this;\n      }\n\n      // Assignment to a single value copies to every element\n      template <typename RType>\n      typename enable_if<is_not_expression<RType>::value, IndexedArray&>::type\n      operator=(RType rhs) {\n\tif (!empty()) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n\t  if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\t    assign_inactive_scalar_<IsActive>(rhs);\n#ifdef ADEPT_RECORDING_PAUSABLE\n\t  }\n\t  else {\n\t    assign_inactive_scalar_<false>(rhs);\n\t  }\n#endif\n\t}\n\treturn *this;\n      }\n\n    public:\n      // Assignment to an array expression of the same rank\n      template <typename EType, class E>\n      typename enable_if<E::rank == Rank, IndexedArray&>::type\n      operator=(const Expression<EType,E>& rhs) {\n      // Definition moved to Array.h due to its dependence on the\n      // Array class\n\tExpressionSize<Rank> dims;\n\tif (!rhs.get_dimensions(dims)) {\n\t  std::string str = \"Array size mismatch in \"\n\t    + rhs.expression_string() + \".\";\n\t  throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n\t}\n\telse if (!compatible(dims, dimensions_)) {\n\t  std::string str = \"Expr\";\n\t  str += dims.str() + \" object assigned to \" + expression_string_();\n\t  throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n\t}\n\n\tif (!empty()) {\n#ifndef ADEPT_NO_ALIAS_CHECKING\n\t  // Check for aliasing first\n\t  Type const * ptr_begin;\n\t  Type const * ptr_end;\n\t  a_.data_range(ptr_begin, ptr_end);\n\t  if (rhs.is_aliased(ptr_begin, ptr_end)) {\n\t    Array<Rank,Type,IsActive> copy;\n\t    copy = noalias(rhs);\n\t    assign_expression_<IsActive, E::is_active>(copy);\n\t  }\n\t  else {\n#endif\n\t    assign_expression_<IsActive, E::is_active>(rhs);\n#ifndef ADEPT_NO_ALIAS_CHECKING\n\t  }\n#endif\n\t}\n\treturn *this;\n      }\n\n\n      // Assign active scalar expression to an active array by first\n      // converting the RHS to an active scalar\n      template <typename EType, class E>\n      typename enable_if<E::rank == 0 && (Rank > 0)\n\t                 && IsActive && !E::is_lvalue,\n\tIndexedArray&>::type\n      operator=(const Expression<EType,E>& rhs) {\n\tActive<EType> x = rhs;\n\t*this = x;\n\treturn *this;\n      }\n\n      // Assign an active scalar to an active array\n      template <typename PType>\n      typename enable_if<!internal::is_active<PType>::value && IsActive, IndexedArray&>::type\n      operator=(const Active<PType>& rhs) {\n\tADEPT_STATIC_ASSERT(IsActive, ATTEMPT_TO_ASSIGN_ACTIVE_SCALAR_TO_INACTIVE_INDEXED_ARRAY);\n\tif (!empty()) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n\t  if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\t    assign_inactive_scalar_<false>(rhs.scalar_value());\n\t    return *this;\n\t  }\n#endif\n\t  \n\t  ExpressionSize<Rank> coords(0);\n\t  ExpressionSize<a_rank> a_coords(0);\n\t  ExpressionSize<1> a_loc(0);\n\t  Type val = rhs.scalar_value();\n\t  int dim;\n\t  static const int last = Rank-1;\n\t  do {\n \t    coords[last] = 0;\n\t    // Convert between the coordinates of the IndexedArray\n\t    // object to the coordinates of the Array object\n\t    translate_coords_<0,0>(coords, a_coords);\n\t    a_.set_location(a_coords, a_loc);\n\t    // Innermost loop\n\t    for ( ; coords[last] < dimensions_[last]; ++coords[last]) {\n\t      Index index = a_loc[0]\n\t\t+ last_offset_\n\t\t* get_value_with_len_<a_fastest_varying_dim>(coords[last]);\n\t      a_.data()[index] = val;\n\t      ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index());\n\t      ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index);\n\t    }\n\t    advance_index(dim, coords);\n\t  } while (dim >= 0);\n        }\n        return *this;\n      } \n\n\n#define ADEPT_DEFINE_OPERATOR(OPERATOR, OPSYMBOL)\t\\\n    template <class RType>\t\t\t\\\n    IndexedArray& OPERATOR(const RType& rhs) {\t\\\n    return *this = noalias(*this) OPSYMBOL rhs;\t\\\n    }\n    ADEPT_DEFINE_OPERATOR(operator+=, +)\n    ADEPT_DEFINE_OPERATOR(operator-=, -)\n    ADEPT_DEFINE_OPERATOR(operator*=, *)\n    ADEPT_DEFINE_OPERATOR(operator/=, /)\n    //    ADEPT_DEFINE_OPERATOR(operator&=, &);\n    //    ADEPT_DEFINE_OPERATOR(operator|=, |);\n#undef ADEPT_DEFINE_OPERATOR\n\n#ifdef ADEPT_CXX11_FEATURES\n\n    // To enable assignment to an initializer list we take a simple\n    // but inefficient strategy of creating a temporary Array and\n    // assigning to that\n    template <class IType>\n    IndexedArray& operator=(std::initializer_list<IType> list) {\n      ADEPT_STATIC_ASSERT(Rank==1,RANK_MISMATCH_IN_INITIALIZER_LIST);\n      Array<Rank,Type,false> array = list;\n      return (*this = array);\n    }\n    template <class IType>\n    IndexedArray& operator=(std::initializer_list<\n\t\t\t    std::initializer_list<IType> > list) {\n      ADEPT_STATIC_ASSERT(Rank==2,RANK_MISMATCH_IN_INITIALIZER_LIST);\n      Array<Rank,Type,false> array = list;\n      return (*this = array);\n    }\n    template <class IType>\n    IndexedArray& operator=(std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<IType> > > list) {\n      ADEPT_STATIC_ASSERT(Rank==3,RANK_MISMATCH_IN_INITIALIZER_LIST);\n      Array<Rank,Type,false> array = list;\n      return (*this = array);\n    }\n    template <class IType>\n    IndexedArray& operator=(std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<IType> > > > list) {\n      ADEPT_STATIC_ASSERT(Rank==4,RANK_MISMATCH_IN_INITIALIZER_LIST);\n      Array<Rank,Type,false> array = list;\n      return (*this = array);\n    }\n    template <class IType>\n    IndexedArray& operator=(std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<IType> > > > > list) {\n      ADEPT_STATIC_ASSERT(Rank==5,RANK_MISMATCH_IN_INITIALIZER_LIST);\n      Array<Rank,Type,false> array = list;\n      return (*this = array);\n    }\n    template <class IType>\n    IndexedArray& operator=(std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<\n\t\t\t    std::initializer_list<IType> > > > > > list) {\n      ADEPT_STATIC_ASSERT(Rank==6,RANK_MISMATCH_IN_INITIALIZER_LIST);\n      Array<Rank,Type,false> array = list;\n      return (*this = array);\n    }\n\n#endif\n\n\n    protected:\n      // ---------------------------------------------------------------------\n      // Section 5.5. IndexedArray: Internal functions facilitating operator=\n      // ---------------------------------------------------------------------\n\n      // Two versions of assigning an inactive scalar to an indexed\n      // array depending on whether the indexed array is active -\n      // first the case when it is not\n      template <bool LocalIsActive, typename X>\n      typename enable_if<!LocalIsActive,void>::type\n      assign_inactive_scalar_(X x) {\n\tExpressionSize<Rank> coords(0);\n\tExpressionSize<a_rank> a_coords(0);\n\tExpressionSize<1> a_loc(0);\n\tint dim;\n\tstatic const int last = Rank-1;\n\tdo {\n\t  coords[last] = 0;\n\t  // Convert between the coordinates of the IndexedArray\n\t  // object to the coordinates of the Array object\n\t  translate_coords_<0,0>(coords, a_coords);\n\t  a_.set_location(a_coords, a_loc);\n\t  // Innermost loop\n\t  for ( ; coords[last] < dimensions_[last]; ++coords[last]) {\n\t    a_.data()[a_loc[0]\n\t\t      + last_offset_\n\t\t      * get_value_with_len_<a_fastest_varying_dim>(coords[last])]\n\t      = x;\n\t  }\n\t  advance_index(dim, coords);\n\t} while (dim >= 0);\n      }\n\n      // Active version of assigning an inactive scalar\n      template <bool LocalIsActive, typename X>\n      typename enable_if<LocalIsActive,void>::type\n      assign_inactive_scalar_(X x) {\n\t// If not recording we call the inactive version instead\n#ifdef ADEPT_RECORDING_PAUSABLE\n\tif (!ADEPT_ACTIVE_STACK->is_recording()) {\n\t  assign_inactive_scalar_<false, X>(x);\n\t  return;\n\t}\n#endif\n\tExpressionSize<Rank> coords(0);\n\tExpressionSize<a_rank> a_coords(0);\n\tExpressionSize<1> a_loc(0);\n\tint dim;\n\tstatic const int last = Rank-1;\n\tdo {\n\t  coords[last] = 0;\n\t  // Convert between the coordinates of the IndexedArray\n\t  // object to the coordinates of the Array object\n\t  translate_coords_<0,0>(coords, a_coords);\n\t  a_.set_location(a_coords, a_loc);\n\t  // Innermost loop\n\t  for ( ; coords[last] < dimensions_[last]; ++coords[last]) {\n\t    Index index = a_loc[0]\n\t      + last_offset_\n\t      * get_value_with_len_<a_fastest_varying_dim>(coords[last]);\n\t    a_.data()[index] = x;\n\t    ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index);\n\t  }\n\t  advance_index(dim, coords);\n\t} while (dim >= 0);\n      }\n      \n\n      // Assign expression has two versions, passive and active\n      template<bool LeftIsActive, bool RightIsActive, class E>\n      typename enable_if<!LeftIsActive,void>::type\n      assign_expression_(const E& rhs) {\n\tADEPT_STATIC_ASSERT(!RightIsActive, \n\t\t    CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_INDEXED_ARRAY);\n\tExpressionSize<Rank> coords(0);\n\tExpressionSize<a_rank> a_coords(0);\n\tExpressionSize<expr_cast<E>::n_arrays> loc(0);\n\tExpressionSize<1> a_loc(0);\n\tint dim;\n\tstatic const int last = Rank-1;\n\tdo {\n\t  coords[last] = 0;\n\t  rhs.set_location(coords, loc);\n\t  // Convert between the coordinates of the IndexedArray\n\t  // object to the coordinates of the Array object\n\t  translate_coords_<0,0>(coords, a_coords);\n\t  a_.set_location(a_coords, a_loc);\n\t  // Innermost loop\n\t  for ( ; coords[last] < dimensions_[last]; ++coords[last]) {\n\t    a_.data()[a_loc[0]\n\t\t      + last_offset_\n\t\t      * get_value_with_len_<a_fastest_varying_dim>(coords[last])]\n\t      = rhs.next_value(loc);\n\t  }\n\t  advance_index(dim, coords);\n\t} while (dim >= 0);\n      }\n\n      // Active LHS, passive RHS\n      template<bool LeftIsActive, bool RightIsActive, class E>\n      typename enable_if<LeftIsActive && !RightIsActive,void>::type\n      assign_expression_(const E& rhs) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n\tif (!ADEPT_ACTIVE_STACK->is_recording()) {\n\t  assign_expression_<false,false>(rhs);\n\t  return;\n\t}\n#endif\n\tExpressionSize<Rank> coords(0);\n\tExpressionSize<a_rank> a_coords(0);\n\tExpressionSize<expr_cast<E>::n_arrays> loc(0);\n\tExpressionSize<1> a_loc(0);\n\tint dim;\n\tstatic const int last = Rank-1;\n\tdo {\n\t  coords[last] = 0;\n\t  rhs.set_location(coords, loc);\n\t  // Convert between the coordinates of the IndexedArray\n\t  // object to the coordinates of the Array object\n\t  translate_coords_<0,0>(coords, a_coords);\n\t  a_.set_location(a_coords, a_loc);\n\t  // Innermost loop\n\t  for ( ; coords[last] < dimensions_[last]; ++coords[last]) {\n\t    Index index = a_loc[0]\n\t\t      + last_offset_\n\t      * get_value_with_len_<a_fastest_varying_dim>(coords[last]);\n\t    a_.data()[index] = rhs.next_value(loc);\n\t    ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index);\n\t  }\n\t  advance_index(dim, coords);\n\t} while (dim >= 0);\n      }\n\n      // Active LHS, active RHS\n      template<bool LeftIsActive, bool RightIsActive, class E>\n      typename enable_if<LeftIsActive && RightIsActive,void>::type\n      assign_expression_(const E& rhs) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n\tif (!ADEPT_ACTIVE_STACK->is_recording()) {\n\t  assign_expression_<false,false>(rhs);\n\t  return;\n\t}\n#endif\n\tExpressionSize<Rank> coords(0);\n\tExpressionSize<a_rank> a_coords(0);\n\tExpressionSize<expr_cast<E>::n_arrays> loc(0);\n\tExpressionSize<1> a_loc(0);\n\tint dim;\n\tstatic const int last = Rank-1;\n\n\tADEPT_ACTIVE_STACK->check_space(expr_cast<E>::n_active * dimensions_[0]);\n\tdo {\n\t  coords[last] = 0;\n\t  rhs.set_location(coords, loc);\n\t  // Convert between the coordinates of the IndexedArray\n\t  // object to the coordinates of the Array object\n\t  translate_coords_<0,0>(coords, a_coords);\n\t  a_.set_location(a_coords, a_loc);\n\t  // Innermost loop\n\t  for ( ; coords[last] < dimensions_[last]; ++coords[last]) {\n\t    Index index = a_loc[0]\n\t\t      + last_offset_\n\t      * get_value_with_len_<a_fastest_varying_dim>(coords[last]);\n\t    a_.data()[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc);\n\t    ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index);\n\t  }\n\t  advance_index(dim, coords);\n\t} while (dim >= 0);\n      }\n\n      // Move to the start of the next row\n      void advance_index(int& dim, ExpressionSize<Rank>& coords) const {\n\tdim = Rank-1;\n\twhile (--dim >= 0) {\n\t  if (++coords[dim] >= dimensions_[dim]) {\n\t    coords[dim] = 0;\n\t  }\n\t  else {\n\t    break;\n\t  }\n\t}\n      }\n\n\n      bool empty() { return dimensions_[0] == 0; }\n      \n      // Declare I as it is used before it is defined\n      template<int Dim> struct Ix;\n\n      // Translate coordinates in terms of the IndexedArray object in\n      // to coordinates to the Array object it wraps, accounting for\n      // singleton dimensions in Array that are not included in the\n      // dimensions that IndexedArray presents to external objects\n      template <int InDim, int OutDim>\n      typename enable_if<!is_scalar_int<typename Ix<OutDim>::type>::value\n                         && (InDim < Rank-1), void>::type\n      translate_coords_(const ExpressionSize<Rank>& in,\n\t\t       ExpressionSize<a_rank>& out) const {\n\t// Compute the index of the OutDim dimension of Array\n\tout[OutDim] = get_value_with_len(index_object_<OutDim>(),\n\t\t\t\t\t in[InDim],a_dims_[OutDim]);\n\t// Move on to the next dimension\n\ttranslate_coords_<InDim+1,OutDim+1>(in, out);\n      }\n\n      template <int InDim, int OutDim>\n      typename enable_if<(OutDim < a_rank)\n\t                 && is_scalar_int<typename Ix<OutDim>::type>::value,\n\t\t\t void>::type\n      translate_coords_(const ExpressionSize<Rank>& in,\n\t\t        ExpressionSize<a_rank>& out) const {\n\t// This is a singleton dimension so the 0th element is the\n\t// only element\n\tout[OutDim] = get_value_with_len(index_object_<OutDim>(),\n\t\t\t\t\t  0,a_dims_[OutDim]);\n\t// Move on to the next OutDim dimension of Array\n\ttranslate_coords_<InDim,OutDim+1>(in, out);\n      }\n\n      template <int InDim, int OutDim>\n      typename enable_if<!is_scalar_int<typename Ix<OutDim>::type>::value\n                         && InDim == Rank-1, void>::type\n      translate_coords_(const ExpressionSize<Rank>& in,\n\t\t       ExpressionSize<a_rank>& out) const {\n\t// The final non-singleton dimension is set to zero, since it\n\t// will be incremented later by advance_location\n\tout[OutDim] = 0;\n\t// Do any further dimensions, which must be singletons\n\ttranslate_coords_<InDim+1,OutDim+1>(in, out);\n      }\n\n      // Run out of dimensions: do nothing\n      template <int InDim, int OutDim>\n      typename enable_if<InDim == Rank && OutDim == a_rank, void>::type\n      translate_coords_(const ExpressionSize<Rank>& in,\n\t\t       ExpressionSize<a_rank>& out) const { }\n\n      template <int Dim>\n      Index get_value_with_len_(const Index& j) const {\n\treturn get_value_with_len(index_object_<Dim>(), j, a_dims_[Dim]);\n \t//return get_value_with_len(index_object_<Dim>(), j, dimensions_[Dim]);\n     }\n\n\n      // ---------------------------------------------------------------------\n      // Section 5.6. IndexedArray: Helper functions for the constructor\n      // ---------------------------------------------------------------------\n      // Helper function for translating between the dimensions of the\n      // Array object and that of the IndexedArray, the latter of\n      // which has removed the singleton dimensions of the former\n      template <int InDim, int OutDim>\n      typename enable_if<(OutDim < a_rank)\n\t&& !is_scalar_int<typename Ix<OutDim>::type>::value,void>::type\n      set_dimensions_() {\n\tdimensions_[InDim] = get_size_with_len(index_object_<OutDim>(),\n\t\t\t\t\t      a_dims_[OutDim]);\n\tset_dimensions_<InDim+1, OutDim+1>();\n      }\n      template <int InDim, int OutDim>\n      typename enable_if<(OutDim < a_rank)\n\t&& is_scalar_int<typename Ix<OutDim>::type>::value,void>::type\n      set_dimensions_() {\n\tset_dimensions_<InDim, OutDim+1>();\n      }\n      template <int InDim, int OutDim>\n      typename enable_if<OutDim == a_rank,void>::type\n      set_dimensions_() { }\n\n\n\n      // ---------------------------------------------------------------------\n      // Section 5.7. IndexedArray: Low-level helper sub-classes and functions\n      // ---------------------------------------------------------------------\n\n      // The individual indices are stored in objects of type I0 to\n      // I[Rank-1].  The following sub-class \"index_alias\" enables the\n      // definition of the sub-class I that is used such that\n      // Ix<Dim>::type returns the type of index \"Dim\" at compile time.\n      template <int Dim,class X0,class X1,class X2,class X3,class X4,\n\t\tclass X5,class X6> struct index_alias { };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<0,X0,X1,X2,X3,X4,X5,X6> { typedef X0 type; };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<1,X0,X1,X2,X3,X4,X5,X6> { typedef X1 type; };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<2,X0,X1,X2,X3,X4,X5,X6> { typedef X2 type; };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<3,X0,X1,X2,X3,X4,X5,X6> { typedef X3 type; };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<4,X0,X1,X2,X3,X4,X5,X6> { typedef X4 type; };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<5,X0,X1,X2,X3,X4,X5,X6> { typedef X5 type; };\n\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct index_alias<6,X0,X1,X2,X3,X4,X5,X6> { typedef X6 type; };\n\n      template<int Dim> struct Ix { \n\ttypedef typename index_alias<Dim,I0,I1,I2,I3,I4,I5,I6>::type type; \n      };\n\n      // Similarly, the following enables us to return not just the\n      // type but also a reference to the actual index object via\n      // index_object_<Dim>()\n      template <int Dim> typename enable_if<Dim == 0, const I0&>::type\n      index_object_() const { return i0_; }\n      template <int Dim> typename enable_if<Dim == 1, const I1&>::type\n      index_object_() const { return i1_; }\n      template <int Dim> typename enable_if<Dim == 2, const I2&>::type\n      index_object_() const { return i2_; }\n      template <int Dim> typename enable_if<Dim == 3, const I3&>::type\n      index_object_() const { return i3_; }\n      template <int Dim> typename enable_if<Dim == 4, const I4&>::type\n      index_object_() const { return i4_; }\n      template <int Dim> typename enable_if<Dim == 5, const I5&>::type\n      index_object_() const { return i5_; }\n      template <int Dim> typename enable_if<Dim == 6, const I6&>::type\n      index_object_() const { return i6_; }\n\n      // The following sub-class \"fastest_varying\" enables the\n      // definition of \"a_fastest_varying_dim\" static constant integer\n      // that contains the dimension of Array that varies fastest when\n      // progessing through memory and is not a singleton.  This\n      // corresponds to the dimension \"Rank-1\" of IndexedArray.\n      template<int Dim, class X0,class X1,class X2,\n\t       class X3,class X4,class X5,class X6> \n      struct fastest_varying {\n\tstatic const int value\n\t  = is_scalar_int<typename index_alias<Dim,X0,X1,X2,X3,X4,X5,X6>::type>::value \n\t  ? fastest_varying<Dim-1,X0,X1,X2,X3,X4,X5,X6>::value\n\t  : Dim;\n      };\n      template<class X0,class X1,class X2,class X3,class X4,class X5,class X6> \n      struct fastest_varying<0,X0,X1,X2,X3,X4,X5,X6> {\n\tstatic const int value = 0;\n      };\n\n      static const int a_fastest_varying_dim \n        = fastest_varying<6,I0,I1,I2,I3,I4,I5,I6>::value;\n\n      // ---------------------------------------------------------------------\n      // Section 5.8. IndexedArray: Data\n      // ---------------------------------------------------------------------\n      // Reference to the array being indexed\n      ArrayType& a_;\n      // Individual indices to up to seven dimensions\n      const I0& i0_;\n      const I1& i1_;\n      const I2& i2_;\n      const I3& i3_;\n      const I4& i4_;\n      const I5& i5_;\n      const I6& i6_;\n      // Dimensions of the array being indexed (cannot be a reference\n      // because FixedArrays do not store their dimensions explicitly)\n      ExpressionSize<ArrayType::rank> a_dims_;\n      // Dimensions of the IndexedArray\n      ExpressionSize<Rank> dimensions_;\n      // Separation of elements of the array objects in the dimension\n      // that varies fastests\n      Index last_offset_;\n\n    }; // End class IndexedArray\n\n  } // End namespace internal\n  \n} // End namespace adept\n\n#endif \n"
  },
  {
    "path": "include/adept/Minimizer.h",
    "content": "/* Minimizer.h -- class for minimizing the cost function of an optimizable object\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptMinimizer_H\n#define AdeptMinimizer_H 1\n\n#include <adept/Optimizable.h>\n\nnamespace adept {\n\n  enum MinimizerAlgorithm {\n    MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS = 0,\n    MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT,    // Polak-Ribiere\n    MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR, // Fletcher-Reeves\n    MINIMIZER_ALGORITHM_LEVENBERG,\n    MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT,\n    MINIMIZER_ALGORITHM_NUMBER_AVAILABLE\n  };\n\n  enum MinimizerStatus {\n    MINIMIZER_STATUS_SUCCESS = 0,\n    MINIMIZER_STATUS_EMPTY_STATE,\n    MINIMIZER_STATUS_MAX_ITERATIONS_REACHED,\n    MINIMIZER_STATUS_FAILED_TO_CONVERGE,\n    MINIMIZER_STATUS_DIRECTION_UPHILL,\n    MINIMIZER_STATUS_BOUND_REACHED, // Only returned from line-search\n    MINIMIZER_STATUS_INVALID_COST_FUNCTION,\n    MINIMIZER_STATUS_INVALID_GRADIENT,\n    MINIMIZER_STATUS_INVALID_BOUNDS,\n    MINIMIZER_STATUS_NUMBER_AVAILABLE,\n    MINIMIZER_STATUS_NOT_YET_CONVERGED\n  };\n\n  // Return a C string describing the minimizer status\n  const char* minimizer_status_string(MinimizerStatus status);\n\n  // Return the order of a minimization algorithm: 0 indicates only\n  // the cost function is required, 1 indicates the first derivative\n  // is required, 2 indicates the second derivative is required, while\n  // -1 indicates that the algorithm is not recognized.\n  inline int minimizer_algorithm_order(MinimizerAlgorithm algo) {\n    switch (algo) {\n    case MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS:\n    case MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT:\n    case MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR:\n      return 1;\n      break;\n    case MINIMIZER_ALGORITHM_LEVENBERG:\n    case MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT:\n      return 2;\n      break;\n    default:\n      return -1;\n    }\n  }\n\n  // Convenience function for initializing vectors representing the\n  // lower and upper bounds on state variables\n  inline void minimizer_initialize_bounds(int nx, adept::Vector& x_lower,\n\t\t\t\t\t  adept::Vector& x_upper) {\n    x_lower.resize(nx);\n    x_upper.resize(nx);\n    x_lower = -std::numeric_limits<Real>::max();\n    x_upper =  std::numeric_limits<Real>::max();\n  }\n\n  // A class that can minimize a function using various algorithms\n  class Minimizer {\n\n  public:\n\n    // Tedious C++98 initializations\n    Minimizer(MinimizerAlgorithm algo) {\n      initialize_default_settings();\n      set_algorithm(algo);\n    }\n\n    Minimizer(const std::string& algo) {\n      initialize_default_settings();\n      set_algorithm(algo);\n    }\n\n    void initialize_default_settings() {\n      max_iterations_ = 100; // <=0 means no limit\n      max_step_size_ = -1.0;\n      converged_gradient_norm_ = 0.1;\n      ensure_updated_state_ = -1;\n      levenberg_damping_min_ = 1.0/128.0;\n      levenberg_damping_max_ = 100000.0;\n      levenberg_damping_multiplier_ = 2.0;\n      levenberg_damping_divider_ = 5.0;\n      levenberg_damping_start_ = 0.0;\n      levenberg_damping_restart_ = 1.0/4.0;\n      max_line_search_iterations_ = 10;\n      armijo_coeff_ = 1.0e-4;\n      cg_curvature_coeff_ = 0.1;\n      lbfgs_curvature_coeff_ = 0.9;\n      lbfgs_n_states_ = 6;\n    }\n\n    // Unconstrained minimization\n    MinimizerStatus minimize(Optimizable& optimizable, Vector x);\n    // Constrained minimization\n    MinimizerStatus minimize(Optimizable& optimizable, Vector x,\n\t\t\t     const Vector& x_lower, const Vector& x_upper);\n\n    // Functions to set parameters defining the general behaviour of\n    // minimization algorithms\n    void set_algorithm(MinimizerAlgorithm algo) { algorithm_ = algo; }\n    void set_algorithm(const std::string& algo);\n    void set_max_iterations(int mi)             { max_iterations_ = mi; }\n    void set_converged_gradient_norm(Real cgn)  { converged_gradient_norm_ = cgn; }\n    void set_max_step_size(Real mss)            { max_step_size_ = mss; }\n\n    // Ensure that the last call to compute the cost function uses the\n    // \"solution\" state vector returned by minimize. This ensures that\n    // any variables in user classes that inherit from Optimizable are\n    // up to date with the returned state vector. The \"order\" argument\n    // indicates which the order of derivatives required (provided\n    // they are supported by the minimizing algorithm):\n    // 0=cost_function, 1=cost_function_gradient,\n    // 2=cost_function_gradient_hessian.\n    void ensure_updated_state(int order = 2)    { ensure_updated_state_ = order; }\n    \n    // Return parameters defining behaviour of minimization algorithms\n    MinimizerAlgorithm algorithm() { return algorithm_; }\n    std::string algorithm_name();\n    int max_iterations() { return max_iterations_; }\n    Real converged_gradient_norm() { return converged_gradient_norm_; }      \n\n    // Functions to set parameters defining the behaviour of the\n    // Levenberg and Levenberg-Marquardt algorithm\n    void set_levenberg_damping_limits(Real damp_min, Real damp_max);\n    void set_levenberg_damping_start(Real damp_start);\n    void set_levenberg_damping_restart(Real damp_restart);\n    void set_levenberg_damping_multiplier(Real damp_multiply, Real damp_divide);\n\n    // Functions to set parameters used by the L-BFGS and\n    // Conjugate-Gradient algorithms\n    void set_max_line_search_iterations(int mi) { max_line_search_iterations_ = mi; }\n    void set_armijo_coeff(Real ac)              {\n      if (ac <= 0.0 || ac >= 1.0) {\n\tthrow optimization_exception(\"Armijo coefficient must be greater than 0 and less than 1\");\n      }\n      else {\n\tarmijo_coeff_ = ac;\n      }\n    }\n    void set_lbfgs_curvature_coeff(Real lcc) {\n      if (lcc <= 0.0 || lcc >= 1.0) {\n\tthrow optimization_exception(\"L-BFGS curvature coefficient must be greater than 0 and less than 1\");\n      }\n      else {\n\tlbfgs_curvature_coeff_ = lcc;\n      }\n    }\n    void set_cg_curvature_coeff(Real cgcc) {\n      if (cgcc <= 0.0 || cgcc >= 1.0) {\n\tthrow optimization_exception(\"Conjugate-Gradient curvature coefficient must be greater than 0 and less than 1\");\n      }\n      else {\n\tcg_curvature_coeff_ = cgcc;\n      }\n    }\n\n    // Query aspects of the algorithm progress after it has completed\n    int  n_iterations()        const { return n_iterations_; }\n    int  n_samples()           const { return n_samples_; }\n    Real cost_function()       const { return cost_function_; }\n    Real gradient_norm()       const { return gradient_norm_; }\n    Real start_cost_function() const { return start_cost_function_; }\n    MinimizerStatus status()   const { return status_; }\n\n  protected:\n\n    // Specific minimization algorithms\n\n    // The Limited-Memory Broyden-Fletcher-Goldfarb-Shanno algorithm\n    MinimizerStatus \n    minimize_limited_memory_bfgs(Optimizable& optimizable, Vector x);\n    MinimizerStatus\n    minimize_limited_memory_bfgs_bounded(Optimizable& optimizable, Vector x,\n\t\t\t\t\t const Vector& min_x,\n\t\t\t\t\t const Vector& max_x);\n\n    // The Conjugate-Gradient algorithm; Polak-Ribiere by default,\n    // optionally Fletcher-Reeves\n    MinimizerStatus\n    minimize_conjugate_gradient(Optimizable& optimizable, Vector x,\n\t\t\t\tbool use_fletcher_reeves = false);\n    MinimizerStatus\n    minimize_conjugate_gradient_bounded(Optimizable& optimizable, Vector x,\n\t\t\t\t\tconst Vector& min_x,\n\t\t\t\t\tconst Vector& max_x,\n\t\t\t\t\tbool use_fletcher_reeves = false);\n\n    // The Levenberg-Marquardt algorithm; if use_additive_damping is\n    // true then the Levenberg algorithm is used instead\n    MinimizerStatus\n    minimize_levenberg_marquardt(Optimizable& optimizable, Vector x,\n\t\t\t\t bool use_additive_damping = false);\n    MinimizerStatus\n    minimize_levenberg_marquardt_bounded(Optimizable& optimizable, Vector x,\n\t\t\t\t\t const Vector& min_x,\n\t\t\t\t\t const Vector& max_x,\n\t\t\t\t\t bool use_additive_damping = false);\n\n    // Perform line search starting at state vector \"x\" with gradient\n    // vector \"gradient\", and initial step \"step_size\" in\n    // un-normalized direction \"direction\". Successful minimization of\n    // the function (according to Wolfe conditions) will lead to\n    // MINIMIZER_STATUS_SUCCESS being returned, the new state stored\n    // in \"x\", and if state_up_to_date >= 1 then the gradient stored\n    // in \"gradient\". Other possible return values are\n    // MINIMIZER_STATUS_FAILED_TO_CONVERGE and\n    // MINIMIZER_STATUS_DIRECTION_UPHILL if the initial direction\n    // points uphill, or MINIMIZER_STATUS_INVALID_COST_FUNCTION,\n    // MINIMIZER_STATUS_INVALID_GRADIENT or\n    // MINIMIZER_STATUS_BOUND_REACHED. First the minimum is bracketed,\n    // then a cubic polynomial is fitted to the values and gradients\n    // of the function at the two points in order to select the next\n    // test point.\n    MinimizerStatus\n    line_search(Optimizable& optimizable, Vector x, const Vector& direction,\n\t\tVector test_x, Real& abs_step_size,\n\t\tVector gradient, int& state_up_to_date,\n\t\tReal curvature_coeff, Real bound_step_size = -1.0);\n\n    // Compute the cost function \"cf\" and gradient vector \"gradient\",\n    // along with the scalar gradient \"grad\" in the search direction\n    // \"direction\" (normalized with \"dir_scaling\"), from the state\n    // vector \"x\" plus a step \"step_size\" in the search direction. If\n    // the resulting cost function and gradient satisfy the Wolfe\n    // conditions for sufficient convergence, copy the new state\n    // vector to \"x\" and the step size to \"final_step_size\", and\n    // return MINIMIZER_STATUS_SUCCESS.  Otherwise, return\n    // MINIMIZER_STATUS_NOT_YET_CONVERGED.  Error conditions\n    // MINIMIZER_STATUS_INVALID_COST_FUNCTION and\n    // MINIMIZER_STATUS_INVALID_GRADIENT are also possible.\n    MinimizerStatus\n    line_search_gradient_check(Optimizable& optimizable, Vector x, \n\t\t\t       const Vector& direction,\n\t\t\t       Vector test_x, Real& final_step_size,\n\t\t\t       Vector gradient, int& state_up_to_date,\n\t\t\t       Real step_size, Real grad0, Real dir_scaling,\n\t\t\t       Real& cost_function, Real& grad,\n\t\t\t       Real curvature_coeff);\n\n    // DATA\n\n    // Minimizer type\n    MinimizerAlgorithm algorithm_;\n\n    // Variables controling the general behaviour of the minimizer,\n    // used by all gradient-based algorithms\n    int max_iterations_; // <=0 means no limit\n    Real max_step_size_;\n    Real converged_gradient_norm_;\n    int ensure_updated_state_;\n\n    // Variables controling the specific behaviour of the\n    // Levenberg-Marquardt minimizer\n    Real levenberg_damping_min_;\n    Real levenberg_damping_max_;\n    Real levenberg_damping_multiplier_;\n    Real levenberg_damping_divider_;\n    Real levenberg_damping_start_;\n    Real levenberg_damping_restart_;\n\n    // Variable used by the Conjugate-Gradient and L-BFGS minimizers\n    int max_line_search_iterations_;\n    // Armijo condition determined by this coefficient, the first of\n    // the two Wolfe conditions\n    Real armijo_coeff_;\n\n    // Variables controlling the specific behaviour of the Conjugate\n    // Gradient minimizer\n    // Gradient in search direction must reduce by this amount\n    Real cg_curvature_coeff_;\n\n    // Variables controlling specific behaviour of L-BFGS minimizer\n    // Gradient in search direction must reduce by this amount\n    Real lbfgs_curvature_coeff_;\n    // Number of prevous states to store\n    int lbfgs_n_states_;\n\n    // Variables set during the running of an algorithm and available\n    // to the user afterwards\n\n    // Number of iterations that successfully reduced the cost function\n    int n_iterations_;\n\n    // Number of calculations of the cost function\n    int n_samples_;\n\n    Real start_cost_function_;\n    Real cost_function_;\n    Real gradient_norm_;\n    MinimizerStatus status_;\n  };\n\n  // Implement inline member functions\n\n  // Functions to set parameters defining the behaviour of the\n  // Levenberg and Levenberg-Marquardt algorithm\n  inline void \n  Minimizer::set_levenberg_damping_limits(Real damp_min, Real damp_max) {\n    if (damp_min <= 0.0) {\n      throw optimization_exception(\"Minimum damping factor in Levenberg-Marquardt algorithm must be positive\");\n    }\n    else if (damp_max <= damp_min) {\n      throw optimization_exception(\"Maximum damping factor must be greater than minimum in Levenberg-Marquardt algorithm\");\n    }\n    levenberg_damping_min_ = damp_min;\n    levenberg_damping_max_ = damp_max;\n  }\n  inline void \n  Minimizer::set_levenberg_damping_start(Real damp_start) {\n    if (damp_start < 0.0) {\n      throw optimization_exception(\"Start damping factor in Levenberg-Marquardt algorithm must be positive or zero\");\n    }\n    levenberg_damping_start_ = damp_start;\n  }\n  inline void \n  Minimizer::set_levenberg_damping_restart(Real damp_restart) {\n    if (damp_restart <= 0.0) {\n      throw optimization_exception(\"Restart damping factor in Levenberg-Marquardt algorithm must be positive\");\n    }\n    levenberg_damping_restart_ = damp_restart;\n  }\n  inline void \n  Minimizer::set_levenberg_damping_multiplier(Real damp_multiply,\n\t\t\t\t\t      Real damp_divide) {\n    if (damp_multiply <= 1.0 || damp_divide <= 1.0) {\n      throw optimization_exception(\"Damping multipliers in Levenberg-Marquardt algorithm must be greater than one\");\n    }\n    levenberg_damping_multiplier_ = damp_multiply;\n    levenberg_damping_divider_    = damp_divide;\n  }\n\n};\n\n#endif\n"
  },
  {
    "path": "include/adept/Optimizable.h",
    "content": "/* Optimizable.h -- abstract base classes representing an optimization problem\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptOptimizable_H\n#define AdeptOptimizable_H 1\n\n#include <adept_arrays.h>\n\nnamespace adept {\n\n  // A class representing an optimization problem that can be solved\n  // by Adept's Minimizer class. The user should define their own\n  // class that publicly inherits from Optimizable and overrides the\n  // member functions calc_cost_function and provides_derivative.\n  // This is the minimum requirement to use in gradient-free\n  // minimization algorithms (e.g. Nelder-Mead). To use in\n  // quasi-Newton and conjugate-gradient minimization algorithms, the\n  // user should also override the member function\n  // calc_cost_function_gradient. To use in Newton-type minimization\n  // algorithms such as Gauss-Newton and Levenberg-Marquardt, the user\n  // should also override the member function\n  // calc_cost_function_gradient_hessian.  The user may optionally\n  // override report_progress.\n  class Optimizable {\n  public:\n    virtual ~Optimizable() { }\n\n    // Return the cost function corresponding to the state vector x.\n    virtual Real calc_cost_function(const adept::Vector& x) = 0;\n\n    // Return the cost function corresponding to the state vector x,\n    // and also set the \"gradient\" argument to the gradient of the\n    // cost function with respect to each element of x.\n    virtual Real calc_cost_function_gradient(const adept::Vector& x,\n\t\t\t\t\t     adept::Vector gradient) {\n      // If we get here then a gradient-based minimizer has been\n      // applied to this class but the user has not implemented a\n      // function to compute the gradient.\n      throw optimization_exception(\"Gradient calculation has not been implemented\");\n    }\n   \n    // Return the cost function corresponding to the state vector x,\n    // and set the \"gradient\" argument to the gradient of the cost\n    // function with respect to each element of x, and \"hessian\" to\n    // the second derivative of the cost function with respect to x.\n    virtual Real calc_cost_function_gradient_hessian(const adept::Vector& x,\n\t\t     adept::Vector gradient, adept::SymmMatrix& hessian) {\n      // If we get here then a Newton-type minimizer has been applied\n      // to this class but the user has not implemented a function to\n      // compute the Hessian matrix.\n      throw optimization_exception(\"Hessian calculation has not been implemented\");\n    }\n\n    // This function is called at every iteration, and can be\n    // overridden by child classes to report or store the progress at\n    // each iteration, if required. By default it does nothing.\n    virtual void report_progress(int niter, const adept::Vector& x,\n\t\t\t\t Real cost, Real gnorm) { }\n\n    // Child classes should override this function to provide a\n    // run-time mechanism to check which of the first and second\n    // derivative (i.e. gradient and Hessian, respectively) are\n    // available.  If only the gradient is available then it could be\n    // implemented as: if (order == 0 || order == 1) { return true; }\n    // else { return false; }\n    virtual bool provides_derivative(int order) = 0;\n\n  };\n\n};\n\n#endif\n"
  },
  {
    "path": "include/adept/Packet.h",
    "content": "/* Packet.h -- Vectorization support\n\n    Copyright (C) 2016-2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   A Packet contains a short vector of values, and when it is used in\n   a limited set of arithmetic operations, the appropriate vector\n   instructions will be used.  For example if your hardware and\n   compiler support SSE2 then Packet<float> is a vector of 4x 4-byte\n   floats while Packet<double> is a vector of 2x 8-byte floats. This\n   header file also provides for allocating aligned data\n*/\n\n#ifndef AdeptPacket_H\n#define AdeptPacket_H 1\n\n#include <iostream>\n#include <cstdlib>\n#include <cmath>\n\n// Headers needed for allocation of aligned memory\n#include <new>\n\n#ifdef __unix__\n#include <unistd.h>  // Defines _POSIX_VERSION\n#endif\n#include <stdlib.h>\n#ifdef _MSC_VER\n#include <malloc.h> // Provides _aligned_malloc on Windows\n#endif\n\n#include <adept/quick_e.h>\n#include <adept/base.h>\n\n// -------------------------------------------------------------------\n// Determine how many floating point values will be held in a packet\n// -------------------------------------------------------------------\n\n#ifndef ADEPT_FLOAT_PACKET_SIZE\n#define ADEPT_FLOAT_PACKET_SIZE QE_LONGEST_FLOAT_PACKET\n//static const int ADEPT_FLOAT_PACKET_SIZE = quick_e::longest_packet<float>::size;\n#endif\n#ifndef ADEPT_DOUBLE_PACKET_SIZE\n#define ADEPT_DOUBLE_PACKET_SIZE QE_LONGEST_DOUBLE_PACKET\n//static const int ADEPT_DOUBLE_PACKET_SIZE = quick_e::longest_packet<double>::size\n#endif\n\n// -------------------------------------------------------------------\n// Determine how many floating point values will be held in packet of Real\n// -------------------------------------------------------------------\n\n#if ADEPT_REAL_TYPE_SIZE == 4\n#define ADEPT_REAL_PACKET_SIZE ADEPT_FLOAT_PACKET_SIZE\n#elif ADEPT_REAL_TYPE_SIZE == 8\n#define ADEPT_REAL_PACKET_SIZE ADEPT_DOUBLE_PACKET_SIZE\n#else\n#define ADEPT_REAL_PACKET_SIZE 1\n#endif\n\nnamespace adept {\n\n  namespace internal {\n\n    // Trait to define packet size\n    template <typename T> struct packet_traits\n    { static const int size = 1; };\n    template <> struct packet_traits<float>\n    { static const int size = ADEPT_FLOAT_PACKET_SIZE; };\n    template <> struct packet_traits<double>\n    { static const int size = ADEPT_DOUBLE_PACKET_SIZE; };\n    \n\n    // -------------------------------------------------------------------\n    // Define packet type\n    // -------------------------------------------------------------------\n\n    // Unfortunately, with C++98, unions cannot contain std::complex\n    // because ith as a constructor... therefore Packet inherits from\n    // PacketData to contain the data in order that union is only used\n    // for Packets of types that are actually vectorized (which are\n    // floats and doubles).\n    template <typename T, class Enable = void>\n    struct PacketData {\n      // Static definitions\n      static const int size = packet_traits<T>::size;\n      typedef typename quick_e::packet<T,size>::type intrinsic_type;\n      PacketData(intrinsic_type d) : data(d) { }\n      union {\n\tintrinsic_type data;\n\tT value_[size];\n      };\n      T value() const { return value_[0]; }\n      T& operator[](int i) { return value_[i]; }\n      const T& operator[](int i) const { return value_[i]; }\n    };\n    template <typename T>\n    struct PacketData<T, typename enable_if<packet_traits<T>::size == 1>::type>\n    {\n      // Static definitions\n      static const int size = 1;\n      typedef T intrinsic_type;\n      PacketData(intrinsic_type d) : data(d) { }\n      T data;\n      T value() const { return data; }\n      T& operator[](int i) { return data; }\n      const T& operator[](int i) const { return data; }\n    };\n    \n    template <typename T>\n    struct Packet : public PacketData<T> {\n      using PacketData<T>::data;\n      static const int size = packet_traits<T>::size;\n      typedef typename quick_e::packet<T,size>::type intrinsic_type;\n      //      static const int intrinsic_size = 1; // What is this for?\n      static const std::size_t alignment_bytes = sizeof(intrinsic_type);\n       // T=float/double -> all bits = 1\n      static const std::size_t align_mask = (size == 1) ? -1 : alignment_bytes-1;\n      static const bool        is_vectorized = (size > 1);\n      // Constructors\n      Packet() : PacketData<T>(quick_e::set0<intrinsic_type>()) { }\n      Packet(const Packet& d) : PacketData<T>(d.data) { }\n      template <typename TT>\n      Packet(TT d, typename enable_if<is_same<TT,intrinsic_type>::value,int>::type = 0)\n\t: PacketData<T>(d) { }\n      explicit Packet(const T* d) : PacketData<T>(quick_e::load<intrinsic_type>(d)) { }\n      //      explicit Packet(T d) : PacketData<T>(quick_e::set1<intrinsic_type>(d)) { }\n      template <typename TT>\n      explicit Packet(TT d, typename enable_if<is_same<TT,T>::value&&is_vectorized,int>::type = 0)\n\t: PacketData<T>(quick_e::set1<intrinsic_type>(d)) { }\n      // Member functions\n      void put(T* __restrict d) const { quick_e::store(d, data); }\n      void put_unaligned(T* __restrict d) const { quick_e::storeu(d, data); }\n      //      void operator=(T d)              { data = quick_e::set1<intrinsic_type>(d); }\n      template <typename TT> //, typename enable_if<is_same<T,TT>::value||is_same<T,intrinsic_type>::value,int>::type = 0>\n      void operator=(TT d)              { data = quick_e::set1<intrinsic_type>(d); }\n      //      void operator=(intrinsic_type d) { data = d;       }\n      void operator=(const Packet& d)  { data = d.data;  }\n      void operator+=(const Packet& d) { data = quick_e::add(data, d.data); }\n      void operator-=(const Packet& d) { data = quick_e::sub(data, d.data); }\n      void operator*=(const Packet& d) { data = quick_e::mul(data, d.data); }\n      void operator/=(const Packet& d) { data = quick_e::div(data, d.data); }\n      Packet operator-() const         { return quick_e::neg(data); }\n      Packet operator+() const         { return *this; }\n    };\n\n    //#define QE_PACKET_ARG Packet<T>\n    #define QE_PACKET_ARG const Packet<T>& __restrict\n        \n    // Default functions\n    template <typename T> Packet<T> operator+(QE_PACKET_ARG x, QE_PACKET_ARG y)\n    { return quick_e::add(x.data,y.data); }\n    template <typename T> Packet<T> operator-(QE_PACKET_ARG x, QE_PACKET_ARG y)\n    { return quick_e::sub(x.data,y.data); }\n    template <typename T> Packet<T> operator*(QE_PACKET_ARG x, QE_PACKET_ARG y)\n    { return quick_e::mul(x.data,y.data); }\n    template <typename T> Packet<T> operator/(QE_PACKET_ARG x, QE_PACKET_ARG y)\n    { return quick_e::div(x.data,y.data); }\n    template <typename T> Packet<T> fmin(QE_PACKET_ARG x, QE_PACKET_ARG y)\n    { return quick_e::fmin(x.data,y.data); }\n    template <typename T> Packet<T> fmax(QE_PACKET_ARG x, QE_PACKET_ARG y)\n    { return quick_e::fmax(x.data,y.data); }\n    template <typename T> Packet<T> sqrt(QE_PACKET_ARG x) {\n      using std::sqrt;\n      using quick_e::sqrt;\n      return sqrt(x.data);\n    }\n    template <typename T> Packet<T> fastexp(QE_PACKET_ARG x) {\n      return quick_e::exp(x.data);\n    }\n#ifdef ADEPT_FAST_EXPONENTIAL\n    template <typename T> Packet<T> exp(QE_PACKET_ARG x) {\n      return quick_e::exp(x.data);\n    }\n#else\n    template <typename T> Packet<T> exp(QE_PACKET_ARG x) {\n      return std::exp(x.data);\n    }\n#endif\n\n    template <typename T> T hsum(QE_PACKET_ARG x)  { return quick_e::hsum(x.data); }\n    template <typename T> T hprod(QE_PACKET_ARG x) { return quick_e::hmul(x.data); }\n    template <typename T> T hmin(QE_PACKET_ARG x)  { return quick_e::hmin(x.data); }\n    template <typename T> T hmax(QE_PACKET_ARG x)  { return quick_e::hmax(x.data); }\n\n    template <typename T>\n    std::ostream& operator<<(std::ostream& os, QE_PACKET_ARG x) {\n      os << \"{\";\n      for (int i = 0; i < Packet<T>::size; ++i) {\n\tos << \" \" << x[i];\n      }\n      os << \"}\";\n      return os;\n    }\n\n    // -------------------------------------------------------------------\n    // Aligned allocation and freeing of memory\n    // -------------------------------------------------------------------\n    template <typename Type>\n    inline\n    Type* alloc_aligned(Index n) {\n      std::size_t n_align = Packet<Type>::alignment_bytes;\n      if (n_align < sizeof(void*)) {\n\t// Note that the requested byte alignment passed to\n\t// posix_memalign must be at least sizeof(void*)\n\treturn new Type[n];\n      }\n      else {\n\tType* result;\n#ifdef _POSIX_VERSION\n#if _POSIX_VERSION >= 200112L\n\tif (posix_memalign(reinterpret_cast<void**>(&result), \n\t\t\t   n_align, n*sizeof(Type)) != 0) {\n\t  throw std::bad_alloc();\n\t}\n#else\n\tresult = new Type[n];\n#endif\n#elif defined(_MSC_VER)\n\tresult = reinterpret_cast<Type*>(_aligned_malloc(n*sizeof(Type),\n\t\t\t\t\t\t\t n_align));\n\tif (result == 0) {\n\t  throw std::bad_alloc();\n\t}\n#else\n\tresult = new Type[n];\t\n#endif\n      return result;\n      }\n    }\n    \n    template <typename Type>\n    inline\n    void free_aligned(Type* data) {\n      // Note that we need to use the same condition as used in\n      // alloc_aligned() in order that new[] is followed by delete[]\n      // and posix_memalign is followed by free\n      if (Packet<Type>::alignment_bytes < sizeof(void*)) {\n\tdelete[] data;\n      }\n      else { \n#ifdef _POSIX_VERSION\n#if _POSIX_VERSION >= 200112L   \n\tfree(data);\n#else\n\tdelete[] data;\n#endif\n#elif defined(_MSC_VER)\n\t_aligned_free(data);\n#else\n\tdelete[] data;\n#endif\n      }\n    }\n\n\n    // -------------------------------------------------------------------\n    // Check if templated object is a packet: is_packet\n    // -------------------------------------------------------------------\n    template <typename T>\n    struct is_packet {\n      static const bool value = false;\n    };\n    template <typename T>\n    struct is_packet<Packet<T> > {\n      static const bool value = true;\n    };\n\n  } // End namespace internal\n\n\n  // -------------------------------------------------------------------\n  // Fast exponential function\n  // -------------------------------------------------------------------\n\n#ifdef ADEPT_FAST_SCALAR_EXPONENTIAL\n  // Bring scalar exp from quick_e into this namespace\n  inline float  exp(float x)  { return quick_e::exp(x); }\n  inline double exp(double x) { return quick_e::exp(x); }\n#endif\n  inline float  fastexp(float x)  { return quick_e::exp(x); }\n  inline double fastexp(double x) { return quick_e::exp(x); }\n\n  // This namespace is only for use in array operations\n  namespace functions {\n#ifdef ADEPT_FAST_EXPONENTIAL\n    // Bring scalar exp from quick_e into this namespace\n    inline float  exp(float x)  { return quick_e::exp(x); }\n    inline double exp(double x) { return quick_e::exp(x); }\n#else\n    inline float  exp(float x)  { return std::exp(x); }\n    inline double exp(double x) { return std::exp(x); }\n#endif\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/RangeIndex.h",
    "content": "/* RangeIndex.h -- Helper classes to enable indexing of arrays\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   If an Array is indexed via A(i,j,...) then there are three possible\n   return values: (1) a scalar, if all indices are scalar integers\n   (including 0-rank expressions such as \"end\"); (2) an Array that\n   links to a subset of the data in the original Array, if one or more\n   of the indices is a RangeIndex object and all the rest are scalar\n   integers; and (3) an IndexedArray object, if one or more of the\n   indices is a vector of integers.  All of these return values can be\n   used on the left-hand-side of an expression.\n\n   This file defines the RangeIndex class and associated helper types\n   that facilitate the second case.  A RangeIndex object expresses a\n   sequence of regularly spaced integers, which may have a separation\n   greater than 1 or a negative separation.  Since an Array need not\n   be contiguous in memory, when an Array is indexed by one or more\n   RangeIndex objects the result is also a valid Array.  RangeIndex\n   objects are created by the range(begin,end) and\n   stride(begin,end,stride) functions.\n\n   This file also includes the EndIndex class to enable the use of\n   \"end\" to express the final element of an array dimension being\n   indexed (as in Matlab), and the AllIndex class to enable the use of\n   \"__\" to express all elements of a dimension (as \":\" in Fortran 90\n   and Matlab).\n\n*/\n\n\n#ifndef AdeptRangeIndex_H\n#define AdeptRangeIndex_H 1\n\n#include <adept/Expression.h>\n\nnamespace adept {\n\n  namespace internal {\n    // ---------------------------------------------------------------------\n    // Section 1. EndIndex: enable Matlab-like \"end\" indexing\n    // ---------------------------------------------------------------------\n\n    // When an integer Expression is used as the index to another\n    // expression, make \"end\" (or \"adept::end\") be interpretted as the\n    // index of the final element of the array dimension being\n    // referred to. If an whole multi-dimensional array is referred to\n    // by a single integer Expression, then \"end\" is resolved to the\n    // len-1 (\"len\" being the length of the dimension being indexed).\n    // \"end\" is actually an instantiation of the \"EndIndex\" class, a\n    // rank-0 expression.\n    struct EndIndex : public Expression<Index, EndIndex>\n    {\n      // Static definitions\n      static const int  rank       = 0;\n      static const bool is_active  = false;\n      static const int  n_scratch  = 0;\n      static const int  n_arrays   = 0;\n      static const int  n_active   = 0;\n      \n      // Functions to implement Expression behaviour\n\n      bool get_dimensions_(ExpressionSize<0>& dim) const\n      { return true; }\n      \n      std::string expression_string_() const\n      { return std::string(\"end\"); }\n\n      bool is_aliased_(const Index* mem1, const Index* mem2) const\n      { return false; }\n\n      Index value_with_len_(const Index& j, const Index& len) const\n      { return len-1; }\n\n      // Note that \"end\" can only be used as an index to an array or\n      // expression: when used in any other context it will fail.\n      template <int Rank>\n      Index value_at_location_(const ExpressionSize<Rank>&) const\n      { throw array_exception(\"Cannot determine to which object the \\\"end\\\" index refers to\"\n\t\t\t      ADEPT_EXCEPTION_LOCATION); }\n    };\n    \n    // ---------------------------------------------------------------------\n    // Section 2. get_index_with_len\n    // ---------------------------------------------------------------------\n    // We want range(x,y) and stride(x,y,z) to work for integer\n    // arguments or for 0-rank expressions (including \"end\" and\n    // constructs such as \"end - 1\"), so define the following helper\n    // function. For an integer first argument, \"get_index_with_len\"\n    // just returns the first argument, but for 0-rank expressions of\n    // integer type, the second argument \"len\" is passed in and if the\n    // expression contains an \"end\" then this resolves to len-1.\n\n#ifndef ADEPT_BOUNDS_CHECKING\n    inline Index get_index_with_len(Index j, Index) { return j; }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && E::rank == 0, Index>::type\n    get_index_with_len(const Expression<T,E>& j, Index len) {\n      return j.value_with_len(0, len);\n    }\n#else\n    // Bounds-checking versions\n    inline Index get_index_with_len(Index j, Index len) {\n      if (j < 0 || j >= len) {\n\tthrow index_out_of_bounds();\n      }\n      else {\n\treturn j; \n      }\n    }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && E::rank == 0, Index>::type\n    get_index_with_len(const Expression<T,E>& j, Index len) {\n      Index ind = j.value_with_len(0, len);\n      if (ind < 0 || ind >= len) {\n\tthrow index_out_of_bounds(\"Array index (probably generated from a scalar expression containing \\\"end\\\") is out of bounds\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\treturn ind;\n      }\n    }\n#endif\n\n    // get_stride_with_len is just like get_index_with_len except that\n    // there is no need to do bounds checking\n    inline Index get_stride_with_len(Index j, Index) { return j; }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && E::rank == 0, Index>::type\n    get_stride_with_len(const Expression<T,E>& j, Index len) {\n      return j.value_with_len(0, len);\n    }\n\n    // ---------------------------------------------------------------------\n    // Section 3. get_value\n    // ---------------------------------------------------------------------\n    // If a RangeIndex object is not to be used as an index to an\n    // array, we may wish to access its elements without consideration\n    // of the length of a dimension.\n\n    inline Index get_value(Index j) { return j; }\n\n    template <typename T, class E>\n    inline\n    typename enable_if<std::numeric_limits<T>::is_integer\n\t\t       && E::rank == 0, Index>::type\n    get_value(const Expression<T,E>& j) {\n      return j.scalar_value();\n    }\n\n    // ---------------------------------------------------------------------\n    // Section 3. RangeIndex class\n    // ---------------------------------------------------------------------\n    // A class to store a range of integers, optionally with a fixed\n    // stride, for simple indexing of arrays. \n    template<class BeginType, class EndType, class StrideType>\n    class RangeIndex\n      : public Expression<Index, RangeIndex<BeginType, EndType, StrideType> >\n    {\n    public:\n      static const int  rank       = 1;\n      static const bool is_active  = false;\n      static const int  n_scratch  = 0;\n      static const int  n_arrays   = 1;\n      static const int  n_active   = 0;\n      \n      // Construct with a specified stride\n      RangeIndex(const BeginType& begin, const EndType& end, \n\t\t const StrideType& stride)\n\t: begin_(begin), end_(end), stride_(stride)\n      { };\n\n      // Construct without a specified stride: defaults to 1\n      RangeIndex(const BeginType& begin, const EndType& end)\n\t: begin_(begin), end_(end), stride_(1)\n      { };\n\n      Index size() const \n      { return (end() - begin() + stride()) / stride(); }\n\n      Index size_with_len_(const Index& len) const\n      { return (end(len) - begin(len) + stride(len)) / stride(len); }\n\n      bool get_dimensions_(ExpressionSize<1>& dim) const {\n\tdim[0] = size();\n\treturn true;\n      }\n      std::string expression_string_() const {\n\tstd::stringstream s;\n\ts << \"(\" << begin() << \":\" << end();\n\tIndex str = stride();\n\tif (str != 1) {\n\t  s << \":\" << str;\n\t}\n\ts << \")\";\n\treturn s.str();\n      }\n\n      bool is_aliased_(const Index* mem1, const Index* mem2) const {\n\treturn false;\n      }\n\n      bool all_arrays_contiguous_() const { return true; }\n\n      // When this object is used as an index to another, the\n      // following version of the function is called, in which the\n      // \"len\" element is specified in order for the \"end\" index\n      // specifier to work\n      Index value_with_len_(const Index&j, const Index& len) const \n      { return begin(len) + stride(len)*j; }\n\n      // Advance the location of each array in the expression\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\t++loc[MyArrayNum];\n      }\n\n      template <int MyArrayNum, int NArrays>\n      void set_location_(const ExpressionSize<1>& i, \n\t\t\t ExpressionSize<NArrays>& index) const { }\n\n      // Give the value at a particular offset\n      template <int MyArrayNum, int NArrays>\n      Index value_at_location_(const ExpressionSize<NArrays>& j) const \n      { return begin() + stride()*j[MyArrayNum]; }\n\n      // Access the beginning, end and stride, where the argument\n      // gives the length of the dimension in case any of these is\n      // expressed with respect to \"end\" (which resolves to length-1)\n      Index begin()  const { return get_value(begin_);  }\n      Index end()    const { return get_value(end_);    }\n      Index stride() const { return get_value(stride_); }\n      Index begin(Index len) const\n      {\treturn get_index_with_len(begin_, len); }\n      Index end(Index len) const \n      { return get_index_with_len(end_, len); }\n      Index stride(Index len) const\n      { return get_stride_with_len(stride_, len); }\n\n    private:\n      // Note that a copy rather than a reference to the Expression or\n      // int is stored: this is because if range(i1, i2) is used as\n      // the index to another object, then a temporary object will be\n      // created that will be destroyed immediately after calling the\n      // RangeIndex constructor (following ANSI C++ rules), so a\n      // reference would then point to invalid data.\n      // FIX!!!\n      const BeginType begin_;\n      const EndType end_;\n      const StrideType stride_;\n    };\n\n    // ---------------------------------------------------------------------\n    // Section 4. AllIndex class\n    // ---------------------------------------------------------------------\n    // A class to represent all elements along one dimension, for simple\n    // indexing of arrays with \"__\" (equivalent to \":\" in Fortran).\n    class AllIndex : public Expression<Index, AllIndex>\n    {\n    public:\n      static const int  rank      = 1;\n      static const bool is_active = false;\n      static const int  n_active  = 0;\n      static const int  n_static_ = 0;\n      static const int  n_arrays  = 0;\n\n      // Unknown!\n      //      bool get_dimensions_(ExpressionSize<1>& dim) const { return true; }      \n\n      std::string expression_string_() const { return std::string(\"__\"); }\n\n      bool is_aliased_(const Index* mem1, const Index* mem2) const { return false; }\n\n      Index size_with_len_(const Index& len) const\n      { return len; }\n\n      Index value_with_len_(const Index& j, const Index& len) const\n      { return j; }\n\n      Index value_at_location_(const ExpressionSize<1>& loc) const\n      { return loc[0]; }\n      \n      Index begin(Index len = -1) const { return 0; }\n      Index end(Index len) const { return len-1; }\n      Index stride(Index len = -1) const { return 1; }\n    };\n\n\n    // is_range<T>::value is true if T is of type RangeIndex or\n    // AllIndex\n    template <typename T>\n    struct is_range {\n      static const bool value = false;\n      static const int  count = 0;\n    };\n    template <>\n    struct is_range<AllIndex> {\n      static const bool value = true;\n      static const int  count = 1;\n    };\n    template <class B, class E, class S>\n    struct is_range<RangeIndex<B,E,S> > {\n      static const bool value = true;\n      static const int  count = 1;\n    };\n    \n    // is_regular_index<T>::value is true if T is a valid index to a\n    // dimension of an Array such that the indexed object is also an\n    // Array\n    template <typename T>\n    struct is_regular_index {\n      static const bool value = (is_scalar_int<T>::value\n\t\t\t\t || is_null_type<T>::value\n\t\t\t\t || is_range<T>::value);\n    };\n\n    // is_ranged<>::value is true if at least one of the template\n    // arguments I0 to I[Rank-1] is of type RangeIndex, and all others\n    // are of integer type\n    template <int Rank, typename I0, typename I1 = null_type, \n\t      typename I2 = null_type, typename I3 = null_type,\n\t      typename I4 = null_type, typename I5 = null_type,\n\t      typename I6 = null_type>\n    struct is_ranged {\n      static const bool value = (is_range<I0>::value || is_range<I1>::value\n\t\t\t      || is_range<I2>::value || is_range<I3>::value\n\t\t\t      || is_range<I4>::value || is_range<I5>::value\n\t\t\t      || is_range<I6>::value)\n\t&& Rank == 7 - (  is_null_type<I1>::count + is_null_type<I2>::count\n\t\t\t+ is_null_type<I3>::count + is_null_type<I4>::count\n\t\t\t+ is_null_type<I5>::count + is_null_type<I6>::count)\n\t&& (   is_regular_index<I0>::value && is_regular_index<I1>::value\n\t    && is_regular_index<I2>::value && is_regular_index<I3>::value\n\t    && is_regular_index<I4>::value && is_regular_index<I5>::value\n\t    && is_regular_index<I6>::value);\n      static const int count = is_range<I0>::count + is_range<I1>::count\n\t+ is_range<I2>::count + is_range<I3>::count + is_range<I4>::count\n\t+ is_range<I5>::count + is_range<I6>::count;\n    };\n\n\n\n\n  } // End namespace internal\n\n  // User-accessible functions and objects\n\n  // The actual end object is held in a source file\n  extern ::adept::internal::EndIndex end;\n\n  // The actual \"__\" object is held in a source file\n  extern ::adept::internal::AllIndex __;\n\n  // Return a RangeIndex object representing all the integers between\n  // \"begin\" and \"end\"; the inputs can either be Expressions or ints\n  template<class BeginType, class EndType>\n  inline\n  adept::internal::RangeIndex<BeginType, EndType, int>\n  range(const BeginType& begin, const EndType& end)\n  {\n    return adept::internal::RangeIndex<BeginType, EndType, int>(begin, end, 1);\n  }\n\n  // Return a RangeIndex object representing integers between \"begin\"\n  // and \"end\" spaced \"stride\" apart\n  template<class BeginType, class EndType, class StrideType>\n  inline\n  adept::internal::RangeIndex<BeginType, EndType, StrideType>\n  stride(const BeginType& begin, const EndType& end,\n\t const StrideType& stride)\n  {\n    return adept::internal::RangeIndex<BeginType, EndType, \n\t\t\t\t       StrideType>(begin, end, stride);\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/ScratchVector.h",
    "content": "/* ScratchVector.h -- Class for holding temporary real data\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The ScratchVector class is used to store a temporary vector of real\n   numbers (by default the type \"Real\", but could also be\n   Packet<Real>) for use in optimally evaluating an expression and\n   computing its derivative.  Certain parts of the expression need to\n   store their numerical value when first computed since it will be\n   needed again in the derivative computation.  In Adept 1.x such data\n   were stored in the expression objects themselves, e.g. in\n   adept::Multiply, but now that it is not clear at the level of an\n   individual operation whether vectorization will be possible\n   (requiring Packet<Real>), the storage for such scratch data must be\n   held externally.\n\n*/\n\n#ifndef AdeptScratchVector_H\n#define AdeptScratchVector_H\n\n#include <limits>\n\n#include <adept/base.h>\n#include <adept/traits.h>\n\nnamespace adept {\n\n  namespace internal {\n\n    // Definition of ScratchVector class\n    template <int Size, typename Type = Real>\n    class ScratchVector {\n    public:\n      // Constructors\n\n      // By default no initialization is done\n      ScratchVector() {\n#ifdef ADEPT_INIT_REAL\n\tinitialize<Type>();\n#endif\n      }\n\n#ifdef ADEPT_INIT_REAL\n      template <typename T>\n      typename internal::enable_if<internal::is_floating_point<T>::value, void>::type\n      initialize() {\n\tfor (int is = 0; is < Size; ++is) {\n\t  val[is] = ADEPT_INIT_REAL;\n\t}\n      }\n      template <typename T>\n      typename internal::enable_if<!internal::is_floating_point<T>::value, void>::type\n      initialize() { }\n#endif\n\n      // Set all dimensions to the same value\n      ScratchVector(Type x) {\n\tset_all(x);\n      }\n\n      // Specify the values of all elements\n      ScratchVector(Type x[Size]) {\n\tfor (int i = 0; i < Size; ++i) {\n\t  val[i] = x[i];\n\t}\n      }\n\n      // Assume copy constructor will copy elements of val\n    \n      // Set all to specified value\n      void set_all(Type x) {\n\tfor (int i = 0; i < Size; ++i) {\n\t  val[i] = x;\n\t}\n      }\n\n      // Copy from a ScratchVector object of the same rank\n      void copy(const ScratchVector& d) {\n\tfor (int i = 0; i < Size; ++i) {\n\t  val[i] = d[i];\n\t}\n      }\n      // ...or pointer to raw data\n      void copy(const Type* d) {\n\tfor (int i = 0; i < Size; ++i) {\n\t  val[i] = d[i];\n\t}\n      }\n\n      // Write out contents for debugging\n      std::ostream& write(std::ostream& os) const {\n\tos << \"{\" << val[0];\n\tfor (int i = 1; i < Size; i++) {\n\t  os << \",\" << val[i];\n\t}\n\treturn os << \"}\\n\";\n      }\n\n      // Const and non-const access to elements\n      Type& operator[](int i) { return val[i]; }\n\n      const Type& operator[](int i) const { return val[i]; }\n\n      // Data\n    private:\n      Type val[Size];\n    };\n  \n    // Specialization for scalars (zero-rank arrays) known at compile\n    // time\n    template <>\n      class ScratchVector<0> {\n    public:\n      ScratchVector() { }\n      template <typename T>\n      ScratchVector(T x) { }\n      std::ostream& write(std::ostream& os) const {\n\treturn os << \"{}\\n\";\n      }\n    };\n\n    // Write out all elements for debugging\n    template <int Size>\n    inline\n    std::ostream& operator<<(std::ostream& os, const ScratchVector<Size>& s) {\n      return s.write(os);\n    }\n   \n \n  } // End namespace internal\n\n} // End namespace adept\n\n#endif // AdeptScratchVector_H\n"
  },
  {
    "path": "include/adept/SpecialMatrix.h",
    "content": "/* SpecialMatrix.h -- Active or inactive symmetric and band-diagonal matrices\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The SpecialMatrix is the basis for a wide range of matrix types\n   such as SquareMatrix, DiagonalMatrix, TridiagonalMatrix,\n   SymmetricMatrix etc.\n\n*/\n\n#ifndef AdeptSpecialMatrix_H\n#define AdeptSpecialMatrix_H 1\n\n#include <iostream>\n#include <sstream>\n#include <limits>\n\n#include <adept/base.h>\n#include <adept/Storage.h>\n#include <adept/Expression.h>\n#include <adept/RangeIndex.h>\n#include <adept/ActiveReference.h>\n#include <adept/Array.h>\n#include <adept/FixedArray.h>\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // SpecialMatrix Engine helper classes\n  // -------------------------------------------------------------------\n  enum SymmMatrixOrientation {\n    ROW_LOWER_COL_UPPER=0, ROW_UPPER_COL_LOWER=1\n  };\n\n  namespace internal {\n\n    // -------------------------------------------------------------------\n    // Conventional matrix storage engine\n    // -------------------------------------------------------------------\n\n    // The SpecialMatrix class is assisted by data-free policy classes\n    // that define the behaviour of different matrix types. The first\n    // most basic one is for square matrices. Comments are provided\n    // for the first one only to explain the meaning of each\n    // function. The default here is ROW_MAJOR; the alternative\n    // COL_MAJOR is provided as a specialization of this class.\n    template <MatrixStorageOrder Order>\n    struct SquareEngine {\n      // The number of variables to store for a SpecialMatrix when it\n      // is on the right-hand-side of an expression for its location\n      static const int my_n_arrays = 1;\n      // Used by SpecialMatrix::expression_string() to describe the\n      // matrix type\n      const char* name() const { return \"SquareMatrix\"; }\n      // Used by SpecialMatrix::info_string() to describe the matrix\n      // type\n      std::string long_name() const { return \"SquareMatrix<ROW_MAJOR>\"; }\n      // The offset to use (the spacing in memory of elements along\n      // the slowest varying dimension) for \"packed\" data, i.e. when\n      // this matrix is created by the SpecialMatrix::resize function\n      // rather than being a submatrix to something larger.\n      Index pack_offset(Index dim) const { return dim; }\n      // Provide the memory index to the element at row i, column j\n      Index index(Index i, Index j, Index offset) const {\n\treturn i*offset + j;\n      }\n      // When traversing along a row, this is the separation in memory\n      // of each element\n      template <int MyArrayNum, int NArrays>\n      Index row_offset(Index offset, const ExpressionSize<NArrays>& loc) const {\n\treturn 1; \n      }\n      // This function is used when a SpecialMatrix is used on the\n      // left-hand-side of an expression. For row i, return the range\n      // of columns containing unique elements in j_start and\n      // j_end_plus_1, the memory location of the element\n      // corresponding to j_start in index_start, and the separation\n      // in memory of consecutive elements in this range\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = 0;\n\tj_end_plus_1 = dim;\n\tindex_start = i*offset;\n\tindex_stride = 1;\n      }\n      // Return value at row i, column j as an rvalue, first in the\n      // case of an inactive array...\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\treturn data[index(i,j,offset)]; \n      }\n      // ...now in the case of an active array.\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\treturn Active<Type>(data[index(i,j,offset)]);\n      }\n      // Return value at row i, column j as an lvalue, first in the\n      // case of an inactive array...\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\treturn data[index(i,j,offset)]; \n      }\n      // ...now in the case of an active array.\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex ind = index(i,j,offset);\n\treturn ActiveReference<Type>(data[ind], gradient_index+ind);\n      }\n      // Return the number of elements stored for a SpecialMatrix of\n      // size dim x dim.  This is used both by SpecialMatrix::resize\n      // to know how much memory to allocate, and by\n      // SpecialMatrix::is_aliased to know the memory range spanned by\n      // the object.\n      Index data_size(Index dim, Index offset) const {\n\treturn (dim-1)*offset+dim;\n      }\n      // Memory offset of start of a superdiagonal (offdiag > 0)\n      Index upper_offset(Index dim, Index offset, Index offdiag) const {\n\treturn offdiag;\n      }\n      // Memory offset of start of a subdiagonal (offdiag < 0)\n      Index lower_offset(Index dim, Index offset, Index offdiag) const {\n\treturn -offdiag*offset;\n      }\n      // Check super- and sub-diagonals are in range, otherwise throw\n      // an exception (errors only thrown for band matrices)\n      void check_upper_diag(Index offdiag) const { }\n      void check_lower_diag(Index offdiag) const { }\n      // The type returned by the transpose .T() member function\n      typedef SquareEngine<COL_MAJOR> transpose_engine;\n      // Extra info to store when traversing a SpecialMatrix on the\n      // right-hand-side of an expression\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const { }\n      // Return the value at the specified location in memory\n      template <int MyArrayNum, int NArrays, typename Type>\n      Type value_at_location(const Type* data, \n\t\t\t     const ExpressionSize<NArrays>& loc) const {\n\treturn data[loc[MyArrayNum]];\n      }\n      // Push an element of an active SpecialMatrix onto the stack\n      template <int MyArrayNum, int NArrays, typename Type>\n      void push_rhs(Stack& stack, Type multiplier, Index gradient_index,\n\t\t    const ExpressionSize<NArrays>& loc) const {\n\tstack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]);\n      }\n    };\n\n    // The engine for the SquareMatrix type using column-major\n    // storage; note that this inherits from the row-major version in\n    // order that functions that don't need to be changed can be\n    // imported using \"using\".\n    template <>\n    struct SquareEngine<COL_MAJOR> : public SquareEngine<ROW_MAJOR> {\n      static const int my_n_arrays = 1;\n      const char* name() const { return \"SquareMatrix\"; }\n      std::string long_name() const { return \"SquareMatrix<COL_MAJOR>\"; }\n      Index pack_offset(Index dim) const { return dim; }\n      Index index(Index i, Index j, Index offset) const {\n\treturn i + j*offset;\n      }\n      template <int MyArrayNum, int NArrays>\n      Index row_offset(Index offset, const ExpressionSize<NArrays>& loc) const {\n\treturn offset; \n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = 0;\n\tj_end_plus_1 = dim;\n\tindex_start = i;\n\tindex_stride = offset;\n      }\n\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\treturn data[index(i,j,offset)]; \n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\treturn Active<Type>(data[index(i,j,offset)]);\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\treturn data[index(i,j,offset)]; \n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex ind = index(i,j,offset);\n\treturn ActiveReference<Type>(data[ind], gradient_index+ind);\n      }\n      Index upper_offset(Index dim, Index offset, Index offdiag) const {\n\treturn offdiag*offset;\n      }\n      Index lower_offset(Index dim, Index offset, Index offdiag) const {\n\treturn -offdiag;\n      }\n      typedef SquareEngine<ROW_MAJOR> transpose_engine;\n      using SquareEngine<ROW_MAJOR>::data_size;\n      using SquareEngine<ROW_MAJOR>::check_upper_diag;\n      using SquareEngine<ROW_MAJOR>::check_lower_diag;\n      using SquareEngine<ROW_MAJOR>::set_extras;\n      using SquareEngine<ROW_MAJOR>::value_at_location;\n      using SquareEngine<ROW_MAJOR>::push_rhs;\n    };\n\n    // -------------------------------------------------------------------\n    // Band matrix storage engine\n    // -------------------------------------------------------------------\n\n    // A band matrix uses the BLAS packed storage to store LDiags\n    // subdiagonals and UDiags superdiagonals; the default version\n    // uses row-major storage\n    template <Index LDiags, Index UDiags>\n    struct BandEngineHelper {\n      const char* name() const { return \"BandMatrix\"; }\n    };\n    template <>\n    struct BandEngineHelper<0,0> {\n      const char* name() const { return \"DiagMatrix\"; }\n    };\n    template <>\n    struct BandEngineHelper<1,1> {\n      const char* name() const { return \"TridiagMatrix\"; }\n    };\n    template <>\n    struct BandEngineHelper<2,2> {\n      const char* name() const { return \"PentadiagMatrix\"; }\n    };\n\n    template <MatrixStorageOrder Order, Index LDiags, Index UDiags>\n    struct BandEngine {\n      static const int my_n_arrays = 3;\n      static const Index diagonals = 1+LDiags+UDiags;\n      const char* name() const { return BandEngineHelper<LDiags,UDiags>().name(); }\n      std::string long_name() const { \n\tstd::stringstream s;\n\ts << \"BandMatrix<ROW_MAJOR,LDiags=\" << LDiags\n\t  << \",UDiags=\" << UDiags << \">\";\n\treturn s.str();\n      }\n      Index pack_offset(Index dim) const { return diagonals-1; }\n      Index index(Index i, Index j, Index offset) const {\n\t//\treturn LDiags + i*offset + j;\n\treturn i*offset + j;\n      }\n      template <int MyArrayNum, int NArrays>\n      Index row_offset(Index offset, const ExpressionSize<NArrays>& loc) const {\n\treturn 1; \n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = i<LDiags ? 0 : i-LDiags;\n\tj_end_plus_1 = i+UDiags+1>dim ? dim : i+UDiags+1;\n\tindex_start = i*offset + j_start;\n\tindex_stride = 1;\n      }\n      typedef BandEngine<COL_MAJOR,UDiags,LDiags> transpose_engine;\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\tIndex off = j-i;\n\tType val;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  val = 0;\n\t}\n\telse {\n\t  val = data[index(i,j,offset)]; \n\t}\n\treturn val;\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\tIndex off = j-i;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  return Active<Type>(0.0);\n\t}\n\telse {\n\t  return Active<Type>(data[index(i,j,offset)]);\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex off = j-i;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to off-diagonal in BandMatrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\telse {\n\t  return data[index(i,j,offset)]; \n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex off = j-i;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to off-diagonal in BandMatrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\telse {\n\t  Index ind = index(i,j,offset);\n\t  return ActiveReference<Type>(data[ind], gradient_index+ind);\n\t}\n      }\n      Index data_size(Index dim, Index offset) const {\n\treturn (dim-1)*(offset+1) + 1;// + dim; // - UDiags;\n      }\n\n      Index upper_offset(Index dim, Index offset, Index offdiag) const {\n\treturn offdiag;\n      }\n      Index lower_offset(Index dim, Index offset, Index offdiag) const {\n\treturn -offdiag*offset;\n      }\n      void check_upper_diag(Index offdiag) const {\n\tif (offdiag > UDiags) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue diagonal to off-diagonal in BandMatrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\t  \n\t}\n      }\n      void check_lower_diag(Index offdiag) const { \n\tif (-offdiag > LDiags) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue diagonal to off-diagonal in BandMatrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const {\n\tindex[MyArrayNum+1] = i*(offset+1) - LDiags;\n\tindex[MyArrayNum+2] = index[MyArrayNum+1] + diagonals;\n      }\n      template <int MyArrayNum, int NArrays, typename Type>\n      Type value_at_location(const Type* data, \n\t\t\t     const ExpressionSize<NArrays>& loc) const {\n\tif (loc[MyArrayNum] >= loc[MyArrayNum+1]\n\t    && loc[MyArrayNum] < loc[MyArrayNum+2]) {\n\t  return data[loc[MyArrayNum]];\n\t}\n\telse {\n\t  return 0;\n\t}\n      }\n      template <int MyArrayNum, int NArrays, typename Type>\n      void push_rhs(Stack& stack, Type multiplier, Index gradient_index,\n\t\t    const ExpressionSize<NArrays>& loc) const {\n\tif (loc[MyArrayNum] >= loc[MyArrayNum+1]\n\t    && loc[MyArrayNum] < loc[MyArrayNum+2]) {\n\t  stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]);\n\t}\n      }\n    };\n\n    // The column-major version inherits from the row-major version in\n    // order that some functionality can be imported\n    template <Index LDiags, Index UDiags>\n    struct BandEngine<COL_MAJOR, LDiags, UDiags>\n      : public BandEngine<ROW_MAJOR, LDiags, UDiags> {\n      static const int my_n_arrays = 3;\n      static const Index diagonals = 1+LDiags+UDiags;\n      const char* name() const { return BandEngineHelper<LDiags,UDiags>().name(); }\n      std::string long_name() const { \n\tstd::stringstream s;\n\ts << \"BandMatrix<COL_MAJOR,LDiags=\" << LDiags\n\t  << \",UDiags=\" << UDiags << \">\";\n\treturn s.str();\n      }\n      using BandEngine<ROW_MAJOR,LDiags,UDiags>::pack_offset;\n      Index index(Index i, Index j, Index offset) const {\n\t//\treturn UDiags + i + j*offset;\n\treturn i + j*offset;\n      }\n      template <int MyArrayNum, int NArrays>\n      Index row_offset(Index offset, const ExpressionSize<NArrays>& loc) const {\n\treturn offset;\n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = i<LDiags ? 0 : i-LDiags;\n\tj_end_plus_1 = i+UDiags+1>dim ? dim : i+UDiags+1;\n\tindex_start = i + j_start*offset;\n\tindex_stride = offset;\n      }\n      typedef BandEngine<ROW_MAJOR,UDiags,LDiags> transpose_engine;\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\tIndex off = j-i;\n\tType val;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  val = 0;\n\t}\n\telse {\n\t  val = data[index(i,j,offset)]; \n\t}\n\treturn val;\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\tIndex off = j-i;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  return Active<Type>(0.0);\n\t}\n\telse {\n\t  return Active<Type>(data[index(i,j,offset)]);\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex off = j-i;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to off-diagonal in BandMatrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\telse {\n\t  return data[index(i,j,offset)]; \n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex off = j-i;\n\tif (off > UDiags || off < (-LDiags)) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to off-diagonal in BandMatrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n\telse {\n\t  Index ind = index(i,j,offset);\n\t  return ActiveReference<Type>(data[ind], gradient_index+ind);\n\t}\n      }\n      using BandEngine<ROW_MAJOR,LDiags,UDiags>::data_size;\n\n      Index upper_offset(Index dim, Index offset, Index offdiag) const {\n\t//\treturn LDiags + offdiag*offset;\n\treturn offdiag*offset;\n      }\n      Index lower_offset(Index dim, Index offset, Index offdiag) const {\n\t//\treturn LDiags - offdiag;\n\treturn -offdiag;\n      }\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const {\n\tindex[MyArrayNum+1] = (i-LDiags)*(offset+1) + LDiags;\n\tindex[MyArrayNum+2] = index[MyArrayNum+1] + (diagonals-1)*offset+1;\n      }\n      using BandEngine<ROW_MAJOR,LDiags,UDiags>::check_upper_diag;\n      using BandEngine<ROW_MAJOR,LDiags,UDiags>::check_lower_diag;\n      using BandEngine<ROW_MAJOR,LDiags,UDiags>::value_at_location;\n      using BandEngine<ROW_MAJOR,LDiags,UDiags>::push_rhs;\n    };\n\n    // -------------------------------------------------------------------\n    // Symmetric matrix storage engine\n    // -------------------------------------------------------------------\n\n    // A symmetric matrix - the default version (template parameter\n    // ROW_LOWER_COL_UPPER) should be considered to use row-major\n    // storage with the data held on the lower triangle of the\n    // matrix. This is equivalent to column-major upper-triangle\n    // storage for most uses, except that when this kind of symmetric\n    // matrix is used on the left-hand-side of a statement, it will\n    // only read the lower triangle of the right-hand-side of the\n    // statement (assuming the upper triangle to be a symmetric copy).\n    template <SymmMatrixOrientation Orient>\n    struct SymmEngine : public SquareEngine<ROW_MAJOR> {\n      static const int my_n_arrays = 2;\n      const char* name() const { return \"SymmMatrix\"; }\n      std::string long_name() const {\n\treturn \"SymmMatrix<ROW_LOWER_COL_UPPER>\";\n      }\n      Index index(Index i, Index j, Index offset) const {\n\treturn i >= j ? i*offset + j : i + j*offset;\n      }\n      template <int MyArrayNum, int NArrays>\n      Index row_offset(Index offset, const ExpressionSize<NArrays>& loc) const {\n\treturn loc[MyArrayNum] < loc[MyArrayNum+1] ? 1 : offset; \n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = 0;\n\tj_end_plus_1 = i+1;\n\tindex_start = i*offset;\n\tindex_stride = 1;\n      }\n      typedef SymmEngine<ROW_LOWER_COL_UPPER> transpose_engine;\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\treturn data[index(i,j,offset)]; \n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\treturn Active<Type>(data[index(i,j,offset)]);\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\treturn data[index(i,j,offset)]; \n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex ind = index(i,j,offset);\n\treturn ActiveReference<Type>(data[ind], gradient_index+ind);\n      }\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const {\n\tindex[MyArrayNum+1] = i*(offset+1);\n      }\n      Index upper_offset(Index dim, Index offset, Index offdiag) const {\n\treturn offdiag*offset;\n      }\n      Index lower_offset(Index dim, Index offset, Index offdiag) const {\n\treturn -offdiag*offset;\n      }\n\n      using SquareEngine<ROW_MAJOR>::pack_offset;\n      using SquareEngine<ROW_MAJOR>::data_size;\n      using SquareEngine<ROW_MAJOR>::check_upper_diag;\n      using SquareEngine<ROW_MAJOR>::check_lower_diag;\n      using SquareEngine<ROW_MAJOR>::value_at_location;\n      using SquareEngine<ROW_MAJOR>::push_rhs;\n    };\n\n    // A symmetric matrix whose storage can be considered to be\n    // row-major with the data stored on the upper triangle. This is\n    // equivalent to column-major lower-triangular storage, except\n    // that when this kind of symmetric matrix is on the LHS of a\n    // statement, it will only read the upper triangle of the RHS of\n    // the statement.\n    template <>\n    struct SymmEngine<ROW_UPPER_COL_LOWER> : public SquareEngine<ROW_MAJOR> {\n      static const int my_n_arrays = 2;\n      const char* name() const { return \"SymmMatrix\"; }\n      std::string long_name() const { \n\treturn \"SymmMatrix<ROW_UPPER_COL_LOWER>\";\n      }\n      Index pack_offset(Index dim) const { return dim; }\n      Index index(Index i, Index j, Index offset) const {\n\treturn i <= j ? i*offset + j : i + j*offset;\n      }\n      template <int MyArrayNum, int NArrays>\n      Index row_offset(Index offset, const ExpressionSize<NArrays>& loc) const {\n\treturn loc[MyArrayNum] < loc[MyArrayNum+1] ? offset : 1; \n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = i;\n\tj_end_plus_1 = dim;\n\tindex_start = i*(1+offset);\n\tindex_stride = 1;\n      }\n      typedef SymmEngine<ROW_UPPER_COL_LOWER> transpose_engine;\n      Index upper_offset(Index dim, Index offset, Index offdiag) const {\n\treturn offdiag;\n      }\n      Index lower_offset(Index dim, Index offset, Index offdiag) const {\n\treturn -offdiag;\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\treturn data[index(i,j,offset)]; \n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\treturn Active<Type>(data[index(i,j,offset)]);\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\treturn data[index(i,j,offset)]; \n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tIndex ind = index(i,j,offset);\n\treturn ActiveReference<Type>(data[ind], gradient_index+ind);\n      }\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const {\n\tindex[MyArrayNum+1] = i*(offset+1);\n      }\n\n      using SquareEngine<ROW_MAJOR>::data_size;\n      using SquareEngine<ROW_MAJOR>::check_upper_diag;\n      using SquareEngine<ROW_MAJOR>::check_lower_diag;\n      using SquareEngine<ROW_MAJOR>::value_at_location;\n      using SquareEngine<ROW_MAJOR>::push_rhs;\n    };\n\n    /*\n    // -------------------------------------------------------------------\n    // Symmetric band matrix storage engine\n    // -------------------------------------------------------------------\n    */\n\n    // -------------------------------------------------------------------\n    // Triangular matrix storage engines\n    // -------------------------------------------------------------------\n\n    // Forward declaration\n    template <MatrixStorageOrder Order> struct UpperEngine;\n\n    // Base class for common functions for row-major and column-major\n    // storage\n    template <MatrixStorageOrder Order>\n    struct LowerBase : public SquareEngine<Order> {\n      static const int my_n_arrays = 2;\n\n      using SquareEngine<Order>::pack_offset;\n      using SquareEngine<Order>::data_size;\n      using SquareEngine<Order>::index;\n      using SquareEngine<Order>::row_offset;\n      using SquareEngine<Order>::check_lower_diag;\n      using SquareEngine<Order>::upper_offset;\n      using SquareEngine<Order>::lower_offset;\n\n      const char* name() const { return \"LowerMatrix\"; }\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const {\n\tindex[MyArrayNum+1] = i*(offset+1);\n      }\n      void check_upper_diag(Index offdiag) const {\n\tif (offdiag > 0) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to an upper diagonal of a lower-triangular matrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\t  \n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\tif (i >= j) {\n\t  return data[index(i,j,offset)]; \n\t}\n\telse {\n\t  return 0;\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\tif (i >= j) {\n\t  return Active<Type>(data[index(i,j,offset)]);\n\t}\n\telse {\n\t  return Active<Type>(0.0);\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tif (i >= j) {\n\t  return data[index(i,j,offset)]; \n\t}\n\telse {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to upper part of lower-triangular matrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tif (i >= j) {\n\t  Index ind = index(i,j,offset);\n\t  return ActiveReference<Type>(data[ind], gradient_index+ind);\n\t}\n\telse {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to upper part of lower-triangular matrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t  \n\t}\n      }\n      template <int MyArrayNum, int NArrays, typename Type>\n      Type value_at_location(const Type* data, \n\t\t\t     const ExpressionSize<NArrays>& loc) const {\n\tif (loc[MyArrayNum] <= loc[MyArrayNum+1]) {\n\t  return data[loc[MyArrayNum]];\n\t}\n\telse {\n\t  return 0;\n\t}\n      }\n      template <int MyArrayNum, int NArrays, typename Type>\n      void push_rhs(Stack& stack, Type multiplier, Index gradient_index,\n\t\t    const ExpressionSize<NArrays>& loc) const {\n\tif (loc[MyArrayNum] <= loc[MyArrayNum+1]) {\n\t  stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]);\n\t}\n      }\n    };\n\n    // Lower-triangular matrix using row-major storage\n    template <MatrixStorageOrder Order>\n    struct LowerEngine : public LowerBase<ROW_MAJOR> {\n      std::string long_name() const {\n\treturn \"LowerMatrix<ROW_MAJOR>\";\n      }\n      typedef UpperEngine<COL_MAJOR> transpose_engine;\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = 0;\n\tj_end_plus_1 = i+1;\n\tindex_start = i*offset;\n\tindex_stride = 1;\n      }\n    };\n\n    // Lower-triangular matrix using column-major storage\n    template <>\n    struct LowerEngine<COL_MAJOR> : public LowerBase<COL_MAJOR> {\n      std::string long_name() const {\n\treturn \"LowerMatrix<COL_MAJOR>\";\n      }\n      typedef UpperEngine<ROW_MAJOR> transpose_engine;\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = 0;\n\tj_end_plus_1 = i+1;\n\tindex_start = i;\n\tindex_stride = offset;\n      }\n    };\n\n    // Base class for common functions for row-major and column-major\n    // storage\n    template <MatrixStorageOrder Order>\n    struct UpperBase : public SquareEngine<Order> {\n      static const int my_n_arrays = 2;\n\n      using SquareEngine<Order>::pack_offset;\n      using SquareEngine<Order>::data_size;\n      using SquareEngine<Order>::index;\n      using SquareEngine<Order>::row_offset;\n      using SquareEngine<Order>::check_lower_diag;\n      using SquareEngine<Order>::upper_offset;\n      using SquareEngine<Order>::lower_offset;\n\n      const char* name() const { return \"UpperMatrix\"; }\n      template <int MyArrayNum, int NArrays>\n      void set_extras(Index i, Index offset,\n\t\t      ExpressionSize<NArrays>& index) const {\n\tindex[MyArrayNum+1] = i*(offset+1);\n      }\n      void check_lower_diag(Index offdiag) const {\n\tif (offdiag < 0) {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to a lower diagonal of an upper-triangular matrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\t  \n\t}\n      }\n\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type>::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t Index gradient_index, const Type* data) const {\n\tif (i <= j) {\n\t  return data[index(i,j,offset)]; \n\t}\n\telse {\n\t  return 0;\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,Active<Type> >::type\n      get_scalar(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, const Type* data) const {\n\tif (i <= j) {\n\t  return Active<Type>(data[index(i,j,offset)]);\n\t}\n\telse {\n\t  return Active<Type>(0.0);\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<!IsActive,Type&>::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tif (i <= j) {\n\t  return data[index(i,j,offset)]; \n\t}\n\telse {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to lower part of upper-triangular matrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t}\n      }\n      template <bool IsActive, typename Type>\n      typename internal::enable_if<IsActive,ActiveReference<Type> >::type\n      get_reference(Index i, Index j, Index dim, Index offset, \n\t\t    Index gradient_index, Type* data) {\n\tif (i <= j) {\n\t  Index ind = index(i,j,offset);\n\t  return ActiveReference<Type>(data[ind], gradient_index+ind);\n\t}\n\telse {\n\t  throw index_out_of_bounds(\"Attempt to get lvalue to lower part of upper-triangular matrix\"\n\t\t\t\t    ADEPT_EXCEPTION_LOCATION);\n\t  \n\t}\n      }\n      template <int MyArrayNum, int NArrays, typename Type>\n      Type value_at_location(const Type* data, \n\t\t\t     const ExpressionSize<NArrays>& loc) const {\n\tif (loc[MyArrayNum] >= loc[MyArrayNum+1]) {\n\t  return data[loc[MyArrayNum]];\n\t}\n\telse {\n\t  return 0;\n\t}\n      }\n      template <int MyArrayNum, int NArrays, typename Type>\n      void push_rhs(Stack& stack, Type multiplier, Index gradient_index,\n\t\t    const ExpressionSize<NArrays>& loc) const {\n\tif (loc[MyArrayNum] >= loc[MyArrayNum+1]) {\n\t  stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]);\n\t}\n      }\n    };\n\n    // Upper-triangular matrix using row-major storage\n    template <MatrixStorageOrder Order>\n    struct UpperEngine : public UpperBase<ROW_MAJOR> {\n      typedef LowerEngine<COL_MAJOR> transpose_engine;\n\n      std::string long_name() const {\n\treturn \"UpperMatrix<ROW_MAJOR>\";\n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = i;\n\tj_end_plus_1 = dim;\n\tindex_start = i*(offset+1);\n\tindex_stride = 1;\n      }\n    };\n\n    // Upper-triangular matrix using column-major storage\n    template <>\n    struct UpperEngine<COL_MAJOR> : public UpperBase<COL_MAJOR> {\n      typedef LowerEngine<ROW_MAJOR> transpose_engine;\n      std::string long_name() const {\n\treturn \"UpperMatrix<COL_MAJOR>\";\n      }\n      void get_row_range(Index i, Index dim, Index offset,\n\t\t\t Index& j_start, Index& j_end_plus_1,\n\t\t\t Index& index_start, Index& index_stride) const {\n\tj_start = i;\n\tj_end_plus_1 = dim;\n\tindex_start = i*(offset+1);\n\tindex_stride = offset;\n      }\n    };\n\n  } // End namespace internal\n\n  // -------------------------------------------------------------------\n  // Definition of SpecialMatrix class\n  // -------------------------------------------------------------------\n  template <typename Type = Real, class Engine = internal::SquareEngine<ROW_MAJOR>,\n    bool IsActive = false>\n  class SpecialMatrix \n    : public Expression<Type,SpecialMatrix<Type,Engine,IsActive> >,\n      protected Engine,\n      protected internal::GradientIndex<IsActive> {\n  public:\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 1. Static Definitions\n    // -------------------------------------------------------------------\n\n    // Static definitions to enable the properties of this type of\n    // expression to be discerned at compile time\n    static const bool is_active  = IsActive;\n    static const bool is_lvalue  = true;\n    static const int  rank       = 2;\n    static const int  n_active   = IsActive * (1 + internal::is_complex<Type>::value);\n    static const int  n_scratch  = 0;\n    static const int  n_arrays   = Engine::my_n_arrays;\n    static const bool is_vectorizable = false;\n\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 2. Constructors\n    // -------------------------------------------------------------------\n    \n    // Initialize an empty array\n    SpecialMatrix() : data_(0), storage_(0), dimension_(0)\n    { ADEPT_STATIC_ASSERT(!(std::numeric_limits<Type>::is_integer\n\t\t\t    && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS); }\n\n    // Initialize an array with specified size\n    SpecialMatrix(const ExpressionSize<2>& dims) : storage_(0)\n    { resize(dims[0], dims[1]); }\n    SpecialMatrix(Index m0) : storage_(0) { resize(m0); }\n    SpecialMatrix(Index m0, Index m1) : storage_(0) { resize(m0,m1); }\n\n    // A way to directly create arrays, needed when subsetting\n    // other arrays\n    SpecialMatrix(Type* data, Storage<Type>* s, Index dim, Index offset)\n      : data_(data), storage_(s), dimension_(dim), offset_(offset) {\n      if (storage_) {\n\tstorage_->add_link(); \n\tinternal::GradientIndex<IsActive>::set(data_, storage_);\n      }\n      else {\n\t// It is an error if an active object gets here since it will\n\t// not have a valid gradient index\n\tinternal::GradientIndex<IsActive>::assert_inactive();\n      }\n    }\n    // Similar to the above, but with the gradient index supplied explicitly,\n    // needed when an active FixedArray is being sliced\n    SpecialMatrix(const Type* data0, Index data_offset, Index dim, Index offset,\n\t\t  Index gradient_index0)\n      : internal::GradientIndex<IsActive>(gradient_index0, data_offset),\n\tdata_(const_cast<Type*>(data0)+data_offset), storage_(0), dimension_(dim), offset_(offset) { }\n\n\n    // Initialize an array pointing at existing data: the fact that\n    // storage_ is a null pointer is used to convey the information\n    // that it is not necessary to deallocate the data when this array\n    // is destructed\n    SpecialMatrix(Type* data, Index dim)\n      : data_(data), storage_(0), dimension_(dim), \n\toffset_(Engine::pack_offset(dim)) {\n      ADEPT_STATIC_ASSERT(!IsActive, CANNOT_CONSTRUCT_ACTIVE_SQUARE_ARRAY_WITHOUT_GRADIENT_INDEX);\n    }\n\n    // Copy constructor: links to the source data rather than copying\n    // it.  This is needed because we want a function returning an\n    // SpecialMatrix not to make a deep copy, but rather to perform a\n    // (computationally cheaper) shallow copy; when the SpecialMatrix within\n    // the function is destructed, it will remove its link to the\n    // data, and the responsibility for deallocating the data will\n    // then pass to the SpecialMatrix in the calling function.\n    SpecialMatrix(SpecialMatrix& rhs) \n      : internal::GradientIndex<IsActive>(rhs.gradient_index()),\n        data_(rhs.data()), storage_(rhs.storage()), \n\tdimension_(rhs.dimension()), offset_(rhs.offset()) \n    { if (storage_) storage_->add_link(); }\n\n    // Copy constructor with const argument does exactly the same\n    // thing\n    SpecialMatrix(const SpecialMatrix& rhs) \n      : internal::GradientIndex<IsActive>(rhs.gradient_index()),\n        dimension_(rhs.dimension()), offset_(rhs.offset())\n    { link_(const_cast<SpecialMatrix&>(rhs)); }\n  private:\n    void link_(SpecialMatrix& rhs) {\n      data_ = const_cast<Type*>(rhs.data()); \n      storage_ = const_cast<Storage<Type>*>(rhs.storage());\n      if (storage_) storage_->add_link();\n    }\n\n  public:\n    // Initialize with an expression on the right hand side by\n    // evaluating the expression, requiring the ranks to be equal.\n    // Note that this constructor enables expressions to be used as\n    // arguments to functions that expect an array - to prevent this\n    // implicit conversion, use the \"explicit\" keyword.\n    template<typename EType, class E>\n    explicit\n    SpecialMatrix(const Expression<EType, E>& rhs,\n\t  typename internal::enable_if<E::rank == 2,int>::type = 0)\n      : data_(0), storage_(0), dimension_(0)\n    { *this = rhs; }\n\n    // Destructor: if the data are stored in a Storage object then we\n    // tell it that one fewer object is linking to it; if the number\n    // of links to it drops to zero, it will destruct itself and\n    // deallocate the memory.\n    ~SpecialMatrix()\n    { if (storage_) storage_->remove_link(); }\n\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 3. Assignment operators\n    // -------------------------------------------------------------------\n\n    // Assignment to another matrix: copy the data...\n    // Ideally we would like this to fall back to the operator=(const\n    // Expression&) function, but if we don't define a copy assignment\n    // operator then C++ will generate a default one :-(\n    SpecialMatrix& operator=(const SpecialMatrix& rhs) {\n      *this = static_cast<const Expression<Type,SpecialMatrix>&> (rhs);\n      return *this;\n    }\n\n    // Assignment to an array expression of the same rank\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == 2, SpecialMatrix&>::type\n    operator=(const Expression<EType,E>& rhs) {\n#ifndef ADEPT_NO_DIMENSION_CHECKING\n      ExpressionSize<2> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (empty()) {\n\tresize(dims[0], dims[1]);\n      }\n      else if (!internal::compatible(dims, dimensions())) {\n\tstd::string str = \"Expr\";\n\tstr += dims.str() + \" object assigned to \" + expression_string_();\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n#else\n      if (empty()) {\n\tExpressionSize<2> dims;\n\tif (!rhs.get_dimensions(dims)) {\n\t  std::string str = \"Array size mismatch in \"\n\t    + rhs.expression_string() + \".\";\n\t  throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n\t}\n\tresize(dims[0], dims[1]);\n      }\n#endif\n      if (!empty()) {\n#ifndef ADEPT_NO_ALIAS_CHECKING\n\t// Check for aliasing first\n\tType const * ptr_begin;\n\tType const * ptr_end;\n\tdata_range(ptr_begin, ptr_end);\n\tif (rhs.is_aliased(ptr_begin, ptr_end)) {\n\t  SpecialMatrix copy;\n\t  // It would be nice to wrap noalias around rhs, but then\n\t  // this leads to infinite template recursion since the \"=\"\n\t  // operator calls the current function but with a modified\n\t  // expression type. perhaps a better way would be to make\n\t  // copy.assign_no_alias(rhs) work.\n\t  copy = rhs;\n\t  assign_expression_<IsActive, E::is_active>(copy);\n\t}\n\telse {\n#endif\n\t  // Select active/passive version by delegating to a\n\t  // protected function\n\t  assign_expression_<IsActive, E::is_active>(rhs);\n#ifndef ADEPT_NO_ALIAS_CHECKING\n\t}\n#endif\n      }\n      return *this;\n    }\n    \n    // Assignment to an array expression of the same rank in which the\n    // activeness of the right-hand-side is ignored\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == 2, SpecialMatrix&>::type\n    assign_inactive(const Expression<EType,E>& rhs) {\n      ExpressionSize<2> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (empty()) {\n\tresize(dims[0], dims[1]);\n      }\n      else if (!internal::compatible(dims, dimensions())) {\n\tstd::string str = \"Expr\";\n\tstr += dims.str() + \" object assigned to \" + expression_string_();\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n\n      if (!empty()) {\n\t// Check for aliasing first\n\tType const * ptr_begin;\n\tType const * ptr_end;\n\tdata_range(ptr_begin, ptr_end);\n\tif (rhs.is_aliased(ptr_begin, ptr_end)) {\n\t  std::cout << \"ALIASED!\\n\";\n\t  SpecialMatrix copy;\n\t  copy.assign_inactive(rhs);\n\t  //\t  *this = copy;\n\t  assign_expression_<IsActive, false>(copy);\n\t}\n\telse {\n\t  assign_expression_<IsActive, false>(rhs);\n\t}\n      }\n      return *this;\n    }\n\n    // Assignment to a single value copies to every element\n    template <typename RType>\n    typename internal::enable_if<internal::is_not_expression<RType>::value, SpecialMatrix&>::type\n    operator=(RType rhs) {\n      if (!empty()) {\n\tassign_inactive_scalar<IsActive>(rhs);\n      }\n      return *this;\n    }\n\n    // Assign active scalar expression to an active array by first\n    // converting the RHS to an active scalar\n    template <typename EType, class E>\n    typename internal::enable_if<E::rank == 0 && IsActive && !E::is_lvalue,\n      SpecialMatrix&>::type\n      operator=(const Expression<EType,E>& rhs) {\n      Active<EType> x = rhs;\n      *this = x;\n      return *this;\n    }\n\n  \n    // An active array being assigned to an active scalar\n    template <typename PType>\n    typename internal::enable_if<!internal::is_active<PType>::value && IsActive, SpecialMatrix&>::type\n    operator=(const Active<PType>& rhs) {\n      // If not recording we call the inactive version instead\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (! ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_inactive_scalar<false>(rhs.scalar_value());\n\treturn *this;\n      }\n#endif\n      Type val = rhs.scalar_value();\n      Index j_start, j_end_plus_1, index, index_stride;\n      for (Index i = 0 ; i < dimension_; ++i) {\n\tEngine::get_row_range(i, dimension_, offset_, \n\t\t\t      j_start, j_end_plus_1, index, index_stride);\n\tfor (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) {\n\t  data_[index] = val;\n\t  ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index());\n\t  ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index);\t  \n\t}\n      }\n      return *this;\n    }\n\n\n\n\n    // All the compound assignment operators are unpacked, i.e. a+=b\n    // becomes a=a+b; first for an Expression on the rhs.  We use\n    // \"noalias\" sine there is no need for the entirety of the\n    // right-hand-side of the expression to be copied before\n    // evaluation.\n    template<typename EType, class E>\n    SpecialMatrix& operator+=(const Expression<EType,E>& rhs) {\n      return *this = (noalias(*this) + rhs);\n    }\n    template<typename EType, class E>\n    SpecialMatrix& operator-=(const Expression<EType,E>& rhs) {\n      return *this = (noalias(*this) - rhs);\n    }\n    template<typename EType, class E>\n    SpecialMatrix& operator*=(const Expression<EType,E>& rhs) {\n      return *this = (noalias(*this) * rhs);\n    }\n    template<typename EType, class E>\n    SpecialMatrix& operator/=(const Expression<EType,E>& rhs) {\n      return *this = (noalias(*this) / rhs);\n    }\n\n    // And likewise for a passive scalar on the rhs\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, SpecialMatrix&>::type\n    operator+=(const PType& rhs) {\n      return *this = (noalias(*this) + rhs);\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, SpecialMatrix&>::type\n    operator-=(const PType& rhs) {\n      return *this = (noalias(*this) - rhs);\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, SpecialMatrix&>::type\n    operator*=(const PType& rhs) {\n      return *this = (noalias(*this) * rhs);\n    }\n    template <typename PType>\n    typename internal::enable_if<internal::is_not_expression<PType>::value, SpecialMatrix&>::type\n    operator/=(const PType& rhs) {\n      return *this = (noalias(*this) / rhs);\n    }\n\n  \n    // -------------------------------------------------------------------\n    // SpecialMatrix: 4. Access functions, particularly operator()\n    // -------------------------------------------------------------------\n  \n    // Get l-value of the element at the specified coordinates\n    typename internal::active_reference<Type,IsActive>::type\n    get_lvalue(const ExpressionSize<2>& i) {\n      return get_lvalue_<IsActive>(Engine::index(i[0],i[1],offset_));\n    }\n    \n  protected:\n    template <bool MyIsActive>\n    typename internal::enable_if<MyIsActive, ActiveReference<Type> >::type\n    get_lvalue_(const Index& loc) {\n      return ActiveReference<Type>(data_[loc], gradient_index()+loc);\n    }\n    template <bool MyIsActive>\n    typename internal::enable_if<!MyIsActive, Type&>::type\n    get_lvalue_(const Index& loc) {\n      return data_[loc];\n    }\n\n  public:\n    // Access individual elements of the array.  Each argument must be\n    // of integer type, or a rank-0 expression of integer type (such\n    // as \"end\" or \"end-3\"). Inactive arrays return a reference to the\n    // element, while active arrays return an ActiveReference<Type>\n    // object.\n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::all_scalar_ints<2,I0,I1>::value,\n\t\t       typename internal::active_reference<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1) {\n      return Engine::template \n\tget_reference<IsActive>(internal::get_index_with_len(i0,dimension_),\n\t\t\t\tinternal::get_index_with_len(i1,dimension_),\n\t\t\t\tdimension_, offset_, \n\t\t\t\tgradient_index(), data_);\n    }\n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::all_scalar_ints<2,I0,I1>::value,\n\t\t\t\t typename internal::active_scalar<Type,IsActive>::type>::type\n    operator()(I0 i0, I1 i1) const {\n      return Engine::template get_scalar<IsActive>(internal::get_index_with_len(i0,dimension_),\n\t\t\t\t\t\t   internal::get_index_with_len(i1,dimension_),\n\t\t\t\t\t\t   dimension_, offset_, \n\t\t\t\t\t\t   gradient_index(), data_);\n    }\n    \n    /*\n    // If one or more of the indices is not guaranteed to be monotonic\n    // at compile time then we must return an IndexedSpecialMatrix, now done\n    // for all possible numbers of arguments\n  \n    template <typename I0, typename I1>\n    typename internal::enable_if<internal::is_indexed<Rank,I0,I1>::value\n                       && !internal::is_ranged<Rank,I0,I1>::value,\n\t\t       IndexedSpecialMatrix<internal::is_indexed<Rank,I0,I1>::count,\n\t\t\t\t    Type,IsActive,SpecialMatrix,I0,I1> >::type\n    operator()(const I0& i0, const I1& i1) {\n      static const int new_rank = internal::is_indexed<Rank,I0,I1>::count;\n      return IndexedSpecialMatrix<new_rank,Type,IsActive,SpecialMatrix,I0,I1>(*this, i0, i1);\n    }\n    */\n\n    // diag_vector(offdiag), where A is a 2D square band matrix (including\n    // DiagMatrix, TridiagMatrix etc), returns a 1D array pointing to\n    // the \"offdiag\"-th diagonal of the original data, Can be used as an\n    // lvalue.\n    Array<1,Type,IsActive>\n    diag_vector(Index offdiag = 0) {\n      if (offdiag >= 0) {\n\tEngine::check_upper_diag(offdiag);\n\tExpressionSize<1> dim(dimension_ - offdiag);\n\tExpressionSize<1> offset(offset_+1);\n\treturn Array<1,Type,IsActive>(data_\n\t      +Engine::upper_offset(dimension_,offset_,offdiag),\n\t\t\t\t    storage_, dim, offset);\n      }\n      else {\n\tEngine::check_lower_diag(offdiag);\n\tExpressionSize<1> dim(dimension_ + offdiag);\n\tExpressionSize<1> offset(offset_+1);\n\treturn Array<1,Type,IsActive>(data_\n\t      +Engine::lower_offset(dimension_,offset_,offdiag),\n\t\t\t\t      storage_, dim, offset);\n      }\n    }\n\n    // Extract a square sub-matrix on the diagonal\n    SpecialMatrix\n    submatrix_on_diagonal(Index istart, Index iend) {\n      if (istart < 0 || istart > iend || iend >= dimension_) {\n\tthrow index_out_of_bounds(\"Dimensions out of range in submatrix_on_diagonal\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      return SpecialMatrix(data_+(offset_+1)*istart, \n\t\t\t  storage_, iend-istart+1, offset_);\n    }\n\n    // FIX - add an rvalue version returning const Array (?)\n\n    // Transpose as an lvalue\n    SpecialMatrix<Type, typename Engine::transpose_engine, IsActive>\n    T() {\n      return SpecialMatrix<Type, typename Engine::transpose_engine, \n\tIsActive>(data_, storage_, dimension_, offset_);\n    }\n\n    // Return a SpecialMatrix that is a \"soft\" link to the data in the\n    // present array; that is, it does not copy the Storage object and\n    // increase the reference counter therein. This is useful in a\n    // multi-threaded environment when multiple threads may wish to\n    // subset the same array.\n    SpecialMatrix soft_link() {\n      return SpecialMatrix(data_,0,dimension_,offset_,gradient_index());\n    }\n    const SpecialMatrix soft_link() const {\n      return SpecialMatrix(data_,0,dimension_,offset_,gradient_index());\n    }\n    \n\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 5. Public member functions\n    // -------------------------------------------------------------------\n  \n    // Link to an existing array of the same rank, type and activeness\n    SpecialMatrix& link(SpecialMatrix& rhs) {\n      if (!rhs.data()) {\n\tthrow empty_array(\"Attempt to link to empty array\"\n\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\tclear();\n\tdata_ = rhs.data();\n\tstorage_ = rhs.storage();\n\tdimension_ = rhs.dimension();\n\toffset_ = rhs.offset();\n\tif (storage_) {\n\t  storage_->add_link();\n\t}\n      }\n      return *this;\n    }\n   \n\n#ifndef ADEPT_MOVE_SEMANTICS\n    // A common pattern is to link to a subset of another\n    // SpecialMatrix, e.g. vec1.link(vec2(range(2,4))), but the\n    // problem is that the argument to link is a temporary so will not\n    // bind to SpecialMatrix&. In C++98 we therefore need a function\n    // taking const SpecialMatrix& and then cast away the const-ness. This has\n    // the unfortunate side effect that a non-const SpecialMatrix can be\n    // linked to a const SpecialMatrix.\n    SpecialMatrix& link(const SpecialMatrix& rhs) { \n      return link(const_cast<SpecialMatrix&>(rhs)); \n    }\n#else\n    // But in C++11 we can solve this problem and only bind to\n    // temporary non-const SpecialMatrix\n    SpecialMatrix& link(SpecialMatrix&& rhs) {\n      return link(const_cast<SpecialMatrix&>(rhs));\n    }\n#endif\n\n    // Fortran-like link syntax A >>= B\n    SpecialMatrix& operator>>=(SpecialMatrix& rhs)\n    { return link(rhs); }\n#ifndef ADEPT_MOVE_SEMANTICS\n    SpecialMatrix& operator>>=(const SpecialMatrix& rhs)\n    { return link(const_cast<SpecialMatrix&>(rhs)); }\n#else\n    SpecialMatrix& operator>>=(SpecialMatrix&& rhs)\n    { return link(const_cast<SpecialMatrix&>(rhs)); }\n#endif\n\n    // STL-like size() returns total length of array\n    Index size() const {\n      return dimension_*dimension_;\n    }\n\n    // Return dimensions\n    ExpressionSize<2> dimensions() const {\n      return ExpressionSize<2>(dimension_,dimension_);\n    }\n\n    bool get_dimensions_(ExpressionSize<2>& dim) const {\n      dim[0] = dim[1] = dimension_;\n      return true;\n    }\n\n    // Return individual dimension\n    Index dimension(int j = 0) const {\n      return dimension_;\n    }\n\n    \n    // Return individual offset\n    Index offset() const {\n      return offset_;\n    }\n    \n\n  /*\n    // Get dimensions for matrix operations, treating 1D arrays as\n    // column vectors\n    void get_matrix_dimensions(ExpressionSize<2>& dim) const {\n      dim[0] = dim[1] = dimension_;\n    }\n  */\n\n    /*\n    // Return constant reference to offsets\n    const ExpressionSize<Rank>& offset() const {\n      return offset_;\n    }\n    const Index& last_offset() const { return offset_[Rank-1]; }\n    */\n\n    // Return true if the array is empty\n    bool empty() const { return (dimension_ == 0); }\n\n    // Return a string describing the array\n    std::string info_string() const {\n      std::stringstream str;\n      str << Engine::long_name() << \", dim=\" << dimension_ \n\t  << \", offset=\" << offset_ << \", data_location=\" << data_;\n      return str.str();\n    }\n\n    // Return a pointer to the start of the data\n    Type* data() { return data_; }\n    const Type* data() const { return data_; }\n    const Type* const_data() const { return data_; }\n\n    // Older style\n    Type* data_pointer() { return data_; }\n    const Type* data_pointer() const { return data_; }\n    const Type* const_data_pointer() const { return data_; }\n\n    // Return a pointer to the storage object\n    Storage<Type>* storage() { return storage_; }\n\n    // Reset the array to its original empty state, removing the link\n    // to the data (which may deallocate the data if it was the only\n    // link) and set the dimensions to zero\n    void clear() {\n      if (storage_) {\n\tstorage_->remove_link();\n\tstorage_ = 0;\n      }\n      data_ = 0;\n      dimension_ = 0;\n      offset_ = 0;\n      internal::GradientIndex<IsActive>::clear();\n    }\n\n    // Resize an array\n    void resize(Index dim) {\n\n      ADEPT_STATIC_ASSERT(!(std::numeric_limits<Type>::is_integer\n\t    && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS);\n\n      if (storage_) {\n\tstorage_->remove_link();\n\tstorage_ = 0;\n      }\n      // Check requested dimensions\n      if (dim < 0) {\n\tthrow invalid_dimension(\"Negative array dimension requested\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dim == 0) {\n\tclear();\n      }\n      else {\n\tdimension_ = dim;\n\toffset_ = Engine::pack_offset(dim);\n\tstorage_ = new Storage<Type>(Engine::data_size(dimension_,offset_), IsActive);\n\tdata_ = storage_->data();\n\tinternal::GradientIndex<IsActive>::set(data_, storage_);\n      }\n    }\n\n    // Resize with an ExpressionSize object\n    void resize(Index dim0, Index dim1) {\n      if (dim0 != dim1) {\n\tthrow invalid_dimension(\"Square matrix must have the same x and y dimensions\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      resize(dim0);\n    }\n\n    bool is_aliased_(const Type* mem1, const Type* mem2) const {\n      Type const * ptr_begin;\n      Type const * ptr_end;\n      data_range(ptr_begin, ptr_end);\n      if (ptr_begin <= mem2 && ptr_end >= mem1) {\n\treturn true;\n      }\n      else {\n\treturn false;\n      }\n    }\n  \n    // Cannot traverse a full row just by incrementing an index by 1\n    bool all_arrays_contiguous_() const { return false; }\n\n    Type value_with_len_(const Index& j, const Index& len) const {\n      ADEPT_STATIC_ASSERT(false, CANNOT_USE_VALUE_WITH_LEN_ON_ARRAY_OF_RANK_OTHER_THAN_1);\n      return 0;\n    }\n\n    std::string expression_string_() const {\n      std::stringstream a;\n      a << Engine::name()\n\t<< \"[\" << dimension_ << \",\" << dimension_ << \"]\";\n      return a.str();\n    }\n\n    // The same as operator=(inactive scalar) but does not put\n    // anything on the stack\n    template <typename RType>\n    typename internal::enable_if<internal::is_not_expression<RType>::value, SpecialMatrix&>::type\n    set_value(RType x) {\n      if (!empty()) {\n\tassign_inactive_scalar<false>(x);\n      }\n      return *this;\n    }\n  \n    // Is the array contiguous in memory?\n    bool is_contiguous() const {\n      return (offset_ == Engine::pack_offset(dimension_));\n    }\n  \n    // Return the gradient index for the first element in the array,\n    // or -1 if not active\n    Index gradient_index() const {\n      return internal::GradientIndex<IsActive>::get();\n    }\n\n    /*\n    std::ostream& print(std::ostream& os) const {\n      if (empty()) {\n\tos << \"(empty \" << Engine::name() << \")\";\n      }\n      else if (adept::internal::array_print_curly_brackets) {\n\tos << \"\\n\";\n\tfor (int i = 0; i < dimension_; ++i) {\n\t  if (i == 0) {\n\t    os << \"{{\";\n\t  }\n\t  else {\n\t    os << \" {\";\n\t  }\n\t  for (int j = 0; j < dimension_; ++j) {\n\t    os << (*this)(i,j);\n\t    if (j < dimension_-1) { os << \", \"; }\n\t  }\n\t  os << \"}\";\n\t  if (i < dimension_-1) { \n\t    os << \",\\n\"; \n\t  }\n\t  else {\n\t    //\t    os << \"}\\n\"; \n\t    os << \"}\"; \n\t  }\n\t}\n      }\n      else {\n\tfor (int i = 0; i < dimension_; ++i) {\n\t  for (int j = 0; j < dimension_; ++j) {\n\t    os << (*this)(i,j);\n\t    if (j < dimension_-1) { os << \" \"; }\n\t  }\n\t  os << \"\\n\"; \n\t}\n      }\n      return os;\n    }\n    */\n\n    std::ostream& print(std::ostream& os) const {\n      const Array<rank,Type,IsActive> x(*this);\n      x.print(os);\n      return os;\n    }    \n\n    std::ostream& print_raw(std::ostream& os) const {\n      if (empty()) {\n\tos << \"(empty \" << Engine::name() << \")\\n\";\n      }\n      else {\n\tfor (Index i = 0; i < Engine::data_size(dimension_,offset_); ++i) {\n\t  os << \" \" << data_[i];\n\t}\n\tos << \"\\n\";\n      }\n      return os;\n    }\n\n    // Get pointers to the first and last data members in memory.  \n    void data_range(Type const * &data_begin, Type const * &data_end) const {\n      data_begin = data_;\n      data_end = data_ + Engine::data_size(dimension_, offset_) - 1;\n    }\n\n    // The Stack::independent(x) and Stack::dependent(y) functions add\n    // the gradient_index of objects x and y to std::vector<uIndex>\n    // objects in Stack. Since x and y may be scalars or arrays, this\n    // is best done by delegating to the Active or Array classes.\n    template <typename IndexType>\n    void push_gradient_indices(std::vector<IndexType>& vec) {\n      ADEPT_STATIC_ASSERT(IsActive,\n\t  CANNOT_PUSH_GRADIENT_INDICES_FOR_INACTIVE_SPECIAL_MATRIX); \n      Index j_start, j_end_plus_1, index, index_stride;\n      Index gradient_ind = gradient_index();\n      vec.reserve(vec.size() + Engine::data_size(dimension_, offset_));\n      for (Index i; i < dimension_; ++i) {\n\tEngine::get_row_range(i, dimension_, offset_, \n\t\t\t      j_start, j_end_plus_1, index, index_stride);\n\tfor (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) {\n\t  vec.push_back(gradient_ind + index);\n\t}\n      }\n    }\n\n    // Return inactive array linked to original data\n    SpecialMatrix<Type, Engine, false> inactive_link() {\n      SpecialMatrix<Type, Engine, false> A;\n      A.data_ = data_;\n      A.storage_ = storage_;\n      A.dimension_ = dimension_;\n      A.offset_ = offset_;\n      if (storage_) storage_->add_link();\n      return A;\n    }\n\n\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 6. Member functions accessed by the Expression class\n    // -------------------------------------------------------------------\n\n    template <int MyArrayNum, int NArrays>\n    void set_location_(const ExpressionSize<2>& i, \n\t\t       ExpressionSize<NArrays>& index) const {\n      index[MyArrayNum] = Engine::index(i[0],i[1],offset_);\n      Engine::template set_extras<MyArrayNum>(i[0],offset_,index);\n    }\n    \n    template <int MyArrayNum, int NArrays>\n    Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n      return Engine::template value_at_location<MyArrayNum>(data_, loc);\n    }\n\n    Type& lvalue_at_location(const Index& loc) {\n      return data_[loc];\n    }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t  internal::ScratchVector<NScratch>& scratch) const {\n      return Engine::template value_at_location<MyArrayNum>(data_, loc);\n\n    }\n\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t       const internal::ScratchVector<NScratch>& scratch) const {\n      return Engine::template value_at_location<MyArrayNum>(data_, loc);\n    }\n\n    template <int MyArrayNum, int NArrays>\n    void advance_location_(ExpressionSize<NArrays>& loc) const {\n      loc[MyArrayNum] += Engine::template row_offset<MyArrayNum>(offset_, loc);\n    }\n\n    // If an expression leads to calc_gradient being called on an\n    // active object, we push the multiplier and the gradient index on\n    // to the operation stack (or 1.0 if no multiplier is specified\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n    void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch) const {\n      Engine::template push_rhs<MyArrayNum>(stack, static_cast<Type>(1.0), \n\t\t\t\t\t    gradient_index(), loc);\n    }\n    template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n    void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\tconst internal::ScratchVector<NScratch>& scratch,\n\t\t\tconst MyType& multiplier) const {\n      Engine::template push_rhs<MyArrayNum>(stack, multiplier, gradient_index(), loc);\n    }\n  \n\n\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 7. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n\n    // When assigning a scalar to a whole array, there may be\n    // advantage in specialist behaviour depending on the rank of the\n    // array. This is a generic one that copies the number but treats\n    // the present array as passive.\n    template <bool LocalIsActive, typename X>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_inactive_scalar(X x) {\n      Index j_start, j_end_plus_1, index, index_stride;\n      for (Index i = 0 ; i < dimension_; ++i) {\n\tEngine::get_row_range(i, dimension_, offset_, \n\t\t\t      j_start, j_end_plus_1, index, index_stride);\n\tfor (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) {\n\t  data_[index] = x;\n\t}\n      }\n    }\n\n    // An active array being assigned the value of an inactive scalar\n    template <bool LocalIsActive, typename X>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_inactive_scalar(X x) {\n      // If not recording we call the inactive version instead\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (! ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_inactive_scalar<false, X>(x);\n\treturn;\n      }\n#endif\n      Index j_start, j_end_plus_1, index, index_stride;\n      for (Index i = 0 ; i < dimension_; ++i) {\n\tEngine::get_row_range(i, dimension_, offset_, \n\t\t\t      j_start, j_end_plus_1, index, index_stride);\n\tADEPT_ACTIVE_STACK->push_lhs_range(gradient_index()+index, j_end_plus_1-j_start,\n\t\t\t\t\t   index_stride);\n\tfor (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) {\n\t  data_[index] = x;\n\t}\n      }\n    }\n\n\n    // When copying an expression to a whole array, there may be\n    // advantage in specialist behaviour depending on the rank of the\n    // array\n    template<bool LocalIsActive, bool EIsActive, class E>\n    typename internal::enable_if<!LocalIsActive,void>::type\n    assign_expression_(const E& rhs) {\n      ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY);\n      ExpressionSize<2> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      Index j_start, j_end_plus_1, index, index_stride;\n      for ( ; i[0] < dimension_; ++i[0]) {\n\tEngine::get_row_range(i[0], dimension_, offset_, \n\t\t\t      j_start, j_end_plus_1, index, index_stride);\n\ti[1] = j_start;\n\trhs.set_location(i, ind);\t\n\tfor (i[1] = j_start; i[1] < j_end_plus_1;\n\t     ++i[1], index += index_stride) {\n\t  data_[index] = rhs.next_value(ind);\n\t}\n      }\n    }\n\n    template<bool LocalIsActive, bool EIsActive, class E>\n    typename internal::enable_if<LocalIsActive,void>::type\n    assign_expression_(const E& rhs) {\n      // If recording has been paused then call the inactive version\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\tassign_expression_<false,false>(rhs);\n\treturn;\n      }\n#endif\n      ExpressionSize<2> i(0);\n      ExpressionSize<internal::expr_cast<E>::n_arrays> ind(0);\n      ADEPT_ACTIVE_STACK->check_space(internal::expr_cast<E>::n_active * size());\n      Index j_start, j_end_plus_1, index, index_stride;\n      for ( ; i[0] < dimension_; ++i[0]) {\n\tEngine::get_row_range(i[0], dimension_, offset_, \n\t\t\t      j_start, j_end_plus_1, index, index_stride);\n\ti[1] = j_start;\n\trhs.set_location(i, ind);\t\n\tfor (i[1] = j_start; i[1] < j_end_plus_1; ++i[1], index += index_stride) {\n\t  data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, ind);\n\t  ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index);\n\t}\n      }\n    }\n\n\n    // -------------------------------------------------------------------\n    // SpecialMatrix: 8. Data\n    // -------------------------------------------------------------------\n  protected:\n    Type* data_;                      // Pointer to values\n    Storage<Type>* storage_;          // Pointer to Storage object\n    Index dimension_;                 // Size of each dimension\n    Index offset_;                    // Memory offset for\n\t\t\t\t      // slowest-varying dimension\n\n  }; // End of SpecialMatrix class\n\n\n  // -------------------------------------------------------------------\n  // Helper functions\n  // -------------------------------------------------------------------\n\n  // Print array on a stream\n  template <typename Type, class Engine, bool IsActive>\n  inline\n  std::ostream&\n  operator<<(std::ostream& os, const SpecialMatrix<Type,Engine,IsActive>& A) {\n    return A.print(os);\n  }\n\n  // Extract inactive part of array, working correctly depending on\n  // whether argument is active or inactive\n  template <typename Type, class Engine>\n  inline\n  SpecialMatrix<Type, Engine, false>&\n  value(SpecialMatrix<Type, Engine, false>& expr) {\n    return expr;\n  }\n  template <typename Type, class Engine>\n  inline\n  SpecialMatrix<Type, Engine, false>\n  value(SpecialMatrix<Type, Engine, true>& expr) {\n    return expr.inactive_link();\n  }\n\n  // Array::diag_matrix(), where Array is a 1D array, returns a\n  // DiagMatrix containing the data as the diagonal pointing to the\n  // original data, Can be used as an lvalue. Needs to be defined\n  // after DiagMatrix.\n  template <int Rank, typename Type, bool IsActive>\n  inline\n  SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>, IsActive>\n  Array<Rank,Type,IsActive>::diag_matrix() {\n    return SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>,\n      IsActive> (data_, storage_, dimensions_[0], offset_[0]-1);\n  }\n\n  template <typename Type, bool IsActive, Index J0, Index J1, Index J2,\n\t    Index J3, Index J4, Index J5, Index J6>\n  inline\n  SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>, IsActive>\n  FixedArray<Type,IsActive,J0,J1,J2,J3,J4,J5,J6>::diag_matrix() {\n    return SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>, \n      IsActive> (data_, 0, dimension_<0>::value, offset_<0>::value-1,\n\t\t internal::GradientIndex<IsActive>::get());\n  }\n\n} // End namespace adept\n\n\n\n\n#endif\n"
  },
  {
    "path": "include/adept/Stack.h",
    "content": "/* Stack.h -- Storage of automatic differentiation information\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015-2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The Stack class is where all the derivative information of an\n   algorithm, from which the Jacobian matrix can be constructed, as\n   well as tangent-linear and adjoint operations being carried out for\n   suitable input derivatives.  When a Stack object is created it puts\n   a pointer to itself in a global but thread-local variable that is\n   then accessed whenever an active expression is evaluated.\n\n*/\n\n#ifndef AdeptStack_H\n#define AdeptStack_H 1\n\n#include <cmath>\n#include <iostream>\n#include <typeinfo>\n#include <utility>\n#include <string>\n#include <vector>\n#include <list>\n#include <cstddef>\n#include <limits>\n\n#ifdef ADEPT_STACK_STORAGE_STL\n#include <valarray>\n#endif\n\n#include <adept/base.h>\n#include <adept/exception.h>\n#include <adept/StackStorageOrig.h>\n#include <adept/StackStorageOrigStl.h>\n#include <adept/traits.h>\n\nnamespace adept {\n\n  // ---------------------------------------------------------------------\n  // Access to Stack object via global pointer\n  // ---------------------------------------------------------------------\n\n  // Declare a thread-safe and a thread-unsafe global pointer to the\n  // current stack\n  class Stack;\n  extern ADEPT_THREAD_LOCAL Stack* _stack_current_thread;\n  extern Stack* _stack_current_thread_unsafe;\n\n  // Define ADEPT_ACTIVE_STACK to be the currently active version\n  // regardless of whether we are in thread safe or unsafe mode\n#ifdef ADEPT_STACK_THREAD_UNSAFE\n#define ADEPT_ACTIVE_STACK adept::_stack_current_thread_unsafe\n#else\n#define ADEPT_ACTIVE_STACK adept::_stack_current_thread\n#endif\n\n  // ---------------------------------------------------------------------\n  // Helper classes\n  // ---------------------------------------------------------------------\n\n  // Structure holding a fixed-size array of objects (intended for\n  // double or float)\n  template<int Size, class Type>\n  struct Block {\n    Block() { zero(); }\n    const Type& operator[](uIndex i) const { return data[i]; }\n    Type& operator[](uIndex i) { return data[i]; }\n    void zero() { for (uIndex i = 0; i < Size; i++) data[i] = 0.0; }\n    Type data[Size] ADEPT_SSE2_ALIGNED;\n  };\n\n  // Structure for describing a gap in the current list of gradients\n  struct Gap {\n    Gap(uIndex value) : start(value), end(value) {}\n    Gap(uIndex start_, uIndex end_) : start(start_), end(end_) {}\n    uIndex start;\n    uIndex end;\n  };\n\n  // Forward declaration of Array, to enable Jacobian functions\n  template<int Rank, typename Type, bool IsActive>\n  class Array;\n\n  // ---------------------------------------------------------------------\n  // Definition of Stack class\n  // ---------------------------------------------------------------------\n\n  // \"Stack\" inherits from a class defining the storage of the stack\n  // information, which is controlled by preprocessor\n  // variables. Member functions not defined here are in Stack.cpp.\n  class Stack \n#ifdef ADEPT_STACK_STORAGE_STL\n    : public internal::StackStorageOrigStl\n#else\n    : public internal::StackStorageOrig\n#endif\n  {\n  public:\n    // -------------------------------------------------------------------\n    // Stack: 1. Static Definitions\n    // -------------------------------------------------------------------\n    typedef std::list<Gap> GapList;\n    typedef std::list<Gap>::iterator GapListIterator;\n\n    // -------------------------------------------------------------------\n    // Stack: 2. Constructor and destructor\n    // -------------------------------------------------------------------\n\n    // Only one constructor, which is normally called with no\n    // arguments, but if \"false\" is provided as the argument it will\n    // construct as normal but not attempt to make itself the active stack\n    Stack(bool activate_immediately = true) :\n#ifndef ADEPT_STACK_STORAGE_STL\n      gradient_(0),\n#endif\n      most_recent_gap_(gap_list_.end()),\n      i_gradient_(0), n_allocated_gradients_(0), max_gradient_(0),\n      n_gradients_registered_(0),\n      gradients_initialized_(false), \n#ifdef ADEPT_STACK_THREAD_UNSAFE\n      is_thread_unsafe_(true),\n#else\n      is_thread_unsafe_(false),\n#endif\n      is_recording_(true),\n      // Since the library might be compiled with OpenMP support and\n      // subsequent programs without, we need to tell the library via\n      // the following variable\n#ifdef _OPENMP\n      have_openmp_(true),\n#else\n      have_openmp_(false),\n#endif\n      openmp_manually_disabled_(false)\n    { \n      initialize(ADEPT_INITIAL_STACK_LENGTH);\n      new_recording();\n      if (activate_immediately) {\n\tactivate();\n      }\n    }\n  \n    // Destructor\n    ~Stack();\n\n    // -------------------------------------------------------------------\n    // Stack: 3. Public member functions\n    // -------------------------------------------------------------------\n\n    // This function is no longer available\n    void start(uIndex n = ADEPT_INITIAL_STACK_LENGTH) {\n      throw feature_not_available(\"The Stack::start() function has been removed since Adept version 1.0: see the documentation about how to use Stack::new_recording()\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n    }\n\n    // After a sequence of operation pushes, we may append these to\n    // the previous statement by calling this function.\n    // gradient_index is the index of the gradient on the LHS of the\n    // statement: if this does not match the LHS of the previous\n    // statement then this is an error and \"false\" will be returned. A\n    // \"true\" return value indicates success.\n    bool update_lhs(const uIndex& gradient_index) {\n      if (statement_[n_statements_-1].index != gradient_index) {\n\treturn false;\n      }\n      else {\n\tstatement_[n_statements_-1].end_plus_one = n_operations_;\n\treturn true;\n      }\n    }\n\n    // When an aReal object is created it is registered on the stack\n    // and keeps a copy of its location, which is returned from this\n    // function\n    uIndex register_gradient() {\n      uIndex return_val;\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (is_recording()) {\n#endif\n\tn_gradients_registered_++;\n\tif (gap_list_.empty()) {\n\t  // Add to end of gradient vector\n\t  i_gradient_++;\n\t  if (i_gradient_ > max_gradient_) {\n\t    max_gradient_ = i_gradient_;\n\t  }\n\t  return_val = i_gradient_-1;\n\t}\n\telse {\n\t  // Insert in a gap\n\t  Gap& first_gap = gap_list_.front();\n\t  return_val = first_gap.start;\n\t  first_gap.start++;\n\t  if (first_gap.start > first_gap.end) {\n\t    // Gap has closed: remove it from the list, after checking\n\t    // if it had been stored as the gap that had most recently\n\t    // grown\n\t    if (most_recent_gap_ == gap_list_.begin()) {\n\t      most_recent_gap_ = gap_list_.end();\n\t    }\n\t    gap_list_.pop_front();\n\t  }\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\treturn_val = 0;\n      }\n#endif\n      return return_val;\n    }\n\n    // Register n gradients and return the index of the first one\n    uIndex register_gradients(const uIndex& n)  {\n      uIndex return_val;\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (is_recording()) {\n#endif\n\treturn_val = do_register_gradients(n);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n      else {\n\treturn_val = 0;\n      }\n#endif\n      return return_val;\n    }\n\n\n    // When an aReal object is destroyed it is unregistered from the\n    // stack. If it is at the top of the stack then the stack pointer\n    // can be decremented so that the space can be used by another\n    // object. A gap can appear in the stack if an active object (or\n    // array of active objects) is returned from a function, so we\n    // need to keep track of a \"gap\" appearing in the stack. If the\n    // user uses new and delete without any regard for this \"last-in\n    // first-out\" preference then the number of gradients that are\n    // allocated in the reverse pass may be larger than needed.\n    void unregister_gradient(const uIndex& gradient_index) {\n      n_gradients_registered_--;\n      if (gradient_index+1 == i_gradient_) {\n        // Gradient to be unregistered is at the top of the stack\n        i_gradient_--;\n\tif (!gap_list_.empty()) {\n\t  Gap& last_gap = gap_list_.back();\n\t  if (i_gradient_ == last_gap.end+1) {\n\t    // We have unregistered the elements between the \"gap\" of\n\t    // unregistered element and the top of the stack, so can\n\t    // set the variables indicating the presence of the gap to\n\t    // zero\n\t    i_gradient_ = last_gap.start;\n\t    GapListIterator it = gap_list_.end();\n\t    it--;\n\t    if (most_recent_gap_ == it) {\n\t      most_recent_gap_ = gap_list_.end();\n\t    }\n\t    gap_list_.pop_back();\n\t  }\n\t}\n      }\n      else { // Gradient to be unregistered not at top of stack.\n\t// In the less common situation that the gradient is not at\n\t// the top of the stack, the task of unregistering is a bit\n\t// more involved, so we carry it out in a non-inline function\n\t// to avoid code bloat\n\tunregister_gradient_not_top(gradient_index);\n      }\n    }\n\n    // Unregister n gradients starting at gradient_index\n    void unregister_gradients(const uIndex& gradient_index,\n\t\t\t      const uIndex& n);\n\n\n  protected:\n    uIndex do_register_gradients(const uIndex& n);\n\n    // Unregister a gradient that is not at the top of the stack\n    void unregister_gradient_not_top(const uIndex& gradient_index);\n  public:\n\n    // Set the gradients in the list with indices between start and\n    // end_plus_one-1 to the values pointed to by \"gradient\"\n    template <typename MyReal>\n    typename internal::enable_if<internal::is_floating_point<MyReal>::value,\n\t\t       void>::type\n    set_gradients(uIndex start, uIndex end_plus_one,\n\t\t  const MyReal* gradient) {\n      // Need to initialize the gradient list if not already done\n      if (!gradients_are_initialized()) {\n\tinitialize_gradients();\n      }\n      if (end_plus_one > max_gradient_) {\n\tthrow gradient_out_of_range();\n      }\n      for (uIndex i = start, j = 0; i < end_plus_one; i++, j++) {\n\tgradient_[i] = gradient[j];\n      }\n    }\n    template <typename MyReal>\n    typename internal::enable_if<internal::is_floating_point<MyReal>::value,\n\t\t       void>::type\n    set_gradients(uIndex start, uIndex end_plus_one,\n\t\t  const MyReal* gradient, Index src_stride, Index target_stride) {\n      // Need to initialize the gradient list if not already done\n      if (!gradients_are_initialized()) {\n\tinitialize_gradients();\n      }\n      if (end_plus_one > max_gradient_) {\n\tthrow gradient_out_of_range();\n      }\n      for (uIndex i = start, j = 0; i < end_plus_one; i+=target_stride, j+=src_stride) {\n\tgradient_[i] = gradient[j];\n      }\n    }\n\n    // Get the gradients in the list with indices between start and\n    // end_plus_one-1 and put them in the location pointed to by\n    // \"gradient\"\n    template <typename MyReal>\n    typename internal::enable_if<internal::is_floating_point<MyReal>::value,\n\t\t       void>::type\n    get_gradients(uIndex start, uIndex end_plus_one,\n\t\t  MyReal* gradient) const {\n      if (!gradients_are_initialized()) {\n\tthrow gradients_not_initialized();\n      }\n      if (end_plus_one > max_gradient_) {\n\tthrow gradient_out_of_range();\n      }\n      for (uIndex i = start, j = 0; i < end_plus_one; i++, j++) {\n\tgradient[j] = gradient_[i];\n      }\n    }\n    template <typename MyReal>\n    typename internal::enable_if<internal::is_floating_point<MyReal>::value,\n\t\t       void>::type\n    get_gradients(uIndex start, uIndex end_plus_one,\n\t\t  MyReal* gradient, Index src_stride, Index target_stride) const {\n      if (!gradients_are_initialized()) {\n\tthrow gradients_not_initialized();\n      }\n      if (end_plus_one > max_gradient_) {\n\tthrow gradient_out_of_range();\n      }\n      for (uIndex i = start, j = 0; i < end_plus_one; i+=src_stride, j+=target_stride) {\n\tgradient[j] = gradient_[i];\n      }\n    }\n\n    // Run the tangent-linear algorithm on the gradient list; normally\n    // this call is preceded calls to set_gradient to load input\n    // gradients and followed by calls to get_gradient to extract\n    // gradients\n    void compute_tangent_linear();\n    void forward() { return compute_tangent_linear(); }\n\n    // Run the adjoint algorithm on the gradient list; normally this\n    // call is preceded calls to set_gradient to load input gradient\n    // and followed by calls to get_gradient to extract gradient\n    void compute_adjoint();\n    void reverse() { return compute_adjoint(); }\n\n    // Return the number of independent and dependent variables that\n    // have been identified\n    uIndex n_independent() const { return static_cast<uIndex>(independent_index_.size()); }\n    uIndex n_dependent()   const { return static_cast<uIndex>(dependent_index_.size()); }\n\n    // Compute the Jacobian matrix; note that jacobian_out must be\n    // allocated to be of size m*n, where m is the number of dependent\n    // variables and n is the number of independents. The independents\n    // and dependents must have already been identified with the\n    // functions \"independent\" and \"dependent\", otherwise this\n    // function will throw a\n    // \"dependents_or_independents_not_identified\" exception. The\n    // optional dep_offset and indep_offset specify the offsets in\n    // memory of the dependent and independent variables,\n    // respectively, where 0 indicates to use the size of the other\n    // dimension.  The default is dep_offset=1, i.e. the dependents\n    // vary contiguously in memory which is equivalent to the Jacobian\n    // being stored in column-major order.  Unfortunately this is not\n    // the same as the convention for Adept arrays, but this part of\n    // the interface was designed in Adept 1 before arrays were added.\n    void jacobian(Real* jacobian_out,\n\t\t  Index dep_offset = 1,\n\t\t  Index indep_offset = 0) const {\n      // Call one of jacobian_forward and jacobian_reverse, whichever\n      // would be faster.\n      if (n_independent() <= n_dependent()) {\n\tjacobian_forward(jacobian_out, dep_offset, indep_offset);\n      }\n      else {\n\tjacobian_reverse(jacobian_out, dep_offset, indep_offset);\n      }\n    };\n\n    // Compute the Jacobian matrix, but explicitly specify whether\n    // this is done with repeated forward or reverse passes.\n    void jacobian_forward(Real* jacobian_out,\n\t\t\t  Index dep_offset = 1,\n\t\t\t  Index indep_offset = 0) const;\n    void jacobian_reverse(Real* jacobian_out,\n\t\t\t  Index dep_offset = 1,\n\t\t\t  Index indep_offset = 0) const;\n\n    // If the user included \"adept_arrays.h\" rather than \"adept.h\",\n    // then allow the Jacobian to be returned in the form of an Adept\n    // matrix.\n    void jacobian(Array<2,Real,false> jac) const;\n    void jacobian_forward(Array<2,Real,false> jac) const;\n    void jacobian_reverse(Array<2,Real,false> jac) const;\n    Array<2,Real,false> jacobian() const;\n    Array<2,Real,false> jacobian_forward() const;\n    Array<2,Real,false> jacobian_reverse() const;\n\n    // Return maximum number of OpenMP threads to be used in Jacobian\n    // calculation\n    int max_jacobian_threads() const;\n\n    // Set the maximum number of threads to be used in Jacobian\n    // calculations, if possible. A value of 1 indicates that OpenMP\n    // will not be used, while a value of 0 indicates that the number\n    // will match the number of available processors. Returns the\n    // maximum that will be used, which will be 1 if the Adept library\n    // was compiled without OpenMP support. Note that a value of 1\n    // will disable the use of OpenMP with Adept, so Adept will then\n    // use no OpenMP directives or function calls. Note that if in\n    // your program you use OpenMP with each thread performing\n    // automatic differentiaion with its own independent Adept stack,\n    // then typically only one OpenMP thread is available for each\n    // Jacobian calculation, regardless of whether you call this\n    // function.\n    int set_max_jacobian_threads(int n);\n\n    // In order to compute the jacobian we need to first declare which\n    // active variables are independent (x) and which are dependent\n    // (y). First, the following two functions declare an individual\n    // active variable and an array of active variables to be\n    // independent. Note that we use templates here because aReal has\n    // not been defined.\n    template <class A>\n    void independent(const A& x) {\n      //      independent_index_.push_back(x.gradient_index());\n      x.push_gradient_indices(independent_index_);\n    }\n    template <class A>\n    void independent(const A* x, uIndex n) {\n      for (uIndex i = 0; i < n; i++) {\n\t//\tindependent_index_.push_back(x[i].gradient_index());\n\tx[i].push_gradient_indices(independent_index_);\n      }\n    }\n\n    // Likewise, delcare the dependent variables\n    template <class A>\n    void dependent(const A& x) {\n      //      dependent_index_.push_back(x.gradient_index());\n      x.push_gradient_indices(dependent_index_);\n    }\n    template <class A>\n    void dependent(const A* x, uIndex n) {\n      for (uIndex i = 0; i < n; i++) {\n\t//\tdependent_index_.push_back(x[i].gradient_index());\n\tx[i].push_gradient_indices(dependent_index_);\n      }\n    }\n\n    // Print various bits of information about the Stack to the\n    // specified stream (or standard output if not specified). The\n    // same behaviour can be obtained by \"<<\"-ing the Stack to a\n    // stream.\n    void print_status(std::ostream& os = std::cout) const;\n\n    // Print each derivative statement to the specified stream (or\n    // standard output if not specified)\n    void print_statements(std::ostream& os = std::cout) const;\n\n    // Print the current gradient list to the specified stream (or\n    // standard output if not specified); returns true on success or\n    // false if no gradients have been initialized\n    bool print_gradients(std::ostream& os = std::cout) const;\n\n    // Print a list of the gaps in the gradient list\n    void print_gaps(std::ostream& os = std::cout) const;\n\n    // Clear the gradient list enabling a new adjoint or\n    // tangent-linear computation to be performed with the same\n    // recording\n    void clear_gradients() {\n      gradients_initialized_ = false;\n    }\n\n    // Clear the list of independent variables, in order that a\n    // different Jacobian can be computed from the same recording\n    void clear_independents() {\n      independent_index_.clear();\n    }\n\n    // Clear the list of dependent variables, in order that a\n    // different Jacobian can be computed from the same recording\n    void clear_dependents() {\n      dependent_index_.clear();\n    }\n\n    // Function now removed\n    void clear() {\n      throw feature_not_available(\"The Stack::clear() function has been removed since Adept version 1.0: see the documentation about how to use Stack::new_recording()\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n    }\n    // Function now removed\n    void clear_statements() {\n      throw feature_not_available(\"The Stack::clear_statements() function has been removed since Adept version 1.0: see the documentation about how to use Stack::new_recording()\"\n\t\t\t\t  ADEPT_EXCEPTION_LOCATION);\n    }\n\n    // Make this stack \"active\" by copying its \"this\" pointer to a\n    // global variable; this makes it the stack that aReal objects\n    // subsequently interact with when being created and participating\n    // in mathematical expressions\n    void activate();\n\n    // This stack will stop being the one that aReal objects refer\n    // to; this may be useful if the thread needs to use another stack\n    // object for the next algorithm\n    void deactivate() {\n      if (is_active()) {\n\tADEPT_ACTIVE_STACK = 0;\n      }\n    }\n\n    // Return true if the Stack is \"active\", false otherwise\n    bool is_active() const {\n      return (ADEPT_ACTIVE_STACK == this);\n    }\n\n    // Clear the contents of the various lists ready for a new\n    // recording\n    void new_recording() {\n      clear_stack(); // Defined in the storage class\n      clear_independents();\n      clear_dependents();\n      clear_gradients();\n\n      // i_gradient_ is the maximum index of all currently constructed\n      // aReal objects and max_gradient_ is the maximum index of all\n      // that were used in a recording.  Thus when deleting the\n      // recording we need to set max_gradient_ to i_gradient_ or a\n      // little more.\n      max_gradient_ = i_gradient_+1;\n      // Insert a null statement\n      //    std::cerr << \"Inserting a null statement; when is this needed?\\n\";\n      push_lhs(-1);\n    }\n\n    // Are gradients to be computed?  The default is \"true\", but if\n    // ADEPT_RECORDING_PAUSABLE is defined then this may\n    // be false\n    bool is_recording() const {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      return is_recording_;\n#else\n      return true;\n#endif\n    }\n\n    // Stop recording gradient information, enabling a piece of active\n    // code to be run without the stack information being stored. This\n    // only works if ADEPT_RECORDING_PAUSABLE has been defined.\n    bool pause_recording() {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      is_recording_ = false;\n      return true;\n#else\n      return false;\n#endif\n    }\n    // Continue recording gradient information after a previous\n    // pause_recording() call. This only works if\n    // ADEPT_RECORDING_PAUSABLE has been defined.\n    bool continue_recording() {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      is_recording_ = true;\n      return true;\n#else\n      return false;\n#endif\n    }\n\n    // For modular codes, some modules may have an existing Jacobian\n    // code and possibly be unsuitable for automatic differentiation\n    // using Adept (e.g. because they are written in Fortran).  In\n    // this case, we can use the following two functions to \"wrap\" the\n    // non-Adept code. These are actually normally called by functions\n    // of the same name in the Active, ActiveReference and\n    // ActiveConstReference classes.\n    void add_derivative_dependence(uIndex lhs_index, uIndex rhs_index,\n\t\t\t\t   Real multiplier) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for 1 entry\n\tADEPT_ACTIVE_STACK->check_space(1);\n#endif\n\tif (multiplier != 0.0) {\n\t  push_rhs(multiplier, rhs_index);\n\t}\n\tpush_lhs(lhs_index);\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    void append_derivative_dependence(uIndex lhs_index, uIndex rhs_index,\n\t\t\t\t      Real multiplier) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for 1 entry\n\tADEPT_ACTIVE_STACK->check_space(1);\n#endif\n\tif (multiplier != 0.0) {\n\t  push_rhs(multiplier, rhs_index);\n\t}\n\tif (!update_lhs(lhs_index)) {\n\t  throw wrong_gradient(\"Wrong gradient: append_derivative_dependence called on a different active number from the most recent add_derivative_dependence call\"\n\t\t\t       ADEPT_EXCEPTION_LOCATION);\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // To enable the automatic differentiation of matrix\n    // multiplication, this function performs a similar role to\n    // aReal::add_derivative_dependence.  We add a derivative\n    // expression of the form d[lhs_index] =\n    // sum(multiplier[i*multiplier_stride]*d[rhs_index+i*index_stride]),\n    // where the summation is from i = 0 to n-1. Multiple calls to\n    // this function may be carried out but must be followed by\n    // push_lhs(lhs_index) to specify the left-hand-side of the\n    // statement.\n    template <typename Type>\n    void push_derivative_dependence(uIndex rhs_index,\n\t\t\t\t    const Type* multiplier,\n\t\t\t\t    int n = 1,\n\t\t\t\t    int index_stride = 1,\n\t\t\t\t    int multiplier_stride = 1) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (is_recording()) {\n#endif\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\t// Check there is space in the operation stack for n entries\n\tcheck_space(n);\n#endif\n\tfor (int i = 0; i < n; i++, rhs_index += index_stride, \n\t       multiplier += multiplier_stride) {\n\t  push_rhs(*multiplier, rhs_index);\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n    }\n\n    // Have the gradients been initialized?\n    bool gradients_are_initialized() const { return gradients_initialized_; }\n\n    // Return the number of statements, operations, and how much\n    // memory has been allocated for each\n    uIndex n_statements() const { return n_statements_; }\n    uIndex n_allocated_statements() const { return n_allocated_statements_; }\n    uIndex n_operations() const { return n_operations_; }\n    uIndex n_allocated_operations() const { return n_allocated_operations_; }\n\n    // Return the size of the two dimensions of a Jacobian matrix\n    uIndex n_independents() const { return static_cast<uIndex>(independent_index_.size()); }\n    uIndex n_dependents() const { return static_cast<uIndex>(dependent_index_.size()); }\n\n    // Return the maximum number of gradients required to perform\n    // adjoint calculation\n    uIndex max_gradients() const { return max_gradient_; }\n\n    // Return the highest gradient index on the left-hand-side of any\n    // of the statements currently on the stack\n    uIndex max_gradient_index() const {\n      uIndex mg = 0;\n      for (int is = 0; is < n_statements_; ++is) {\n\tif (statement_[is].index > mg) {\n\t  mg = statement_[is].index;\n\t}\n      }\n      return mg;\n    }\n\n    // Return the index to the current gradient\n    uIndex i_gradient() const { return i_gradient_; }\n\n    // Return the number of gradients memory has been allocated for\n    uIndex n_allocated_gradients() const { return n_allocated_gradients_; }\n\n    // Return the number of bytes used\n    std::size_t memory() const {\n      std::size_t mem = n_statements()*sizeof(uIndex)*2\n\t+ n_operations()*(sizeof(Real)+sizeof(uIndex));\n      if (gradients_are_initialized()) {\n\tmem += max_gradients()*sizeof(Real);\n      }\n      return mem;\n    }\n\n    // Return the number of gradients currently registered\n    uIndex n_gradients_registered() const { return n_gradients_registered_; }\n\n    // Return the fraction of multipliers equal to the specified\n    // number (usually -1, 0 or 1)\n    Real fraction_multipliers_equal_to(Real val) {\n      uIndex sum = 0;\n      for (uIndex i = 0; i < n_operations_; i++) {\n\tif (multiplier_[i] == val) {\n\t  sum++;\n\t}\n      }\n      return static_cast<Real>(sum)/static_cast<Real>(n_operations_);\n    }\n\n\n    bool is_thread_unsafe() const { return is_thread_unsafe_; }\n\n    const GapList& gap_list() const { return gap_list_; }\n\n    // Memory to store statements and operations can be preallocated,\n    // offering modest performance advantage if you define\n    // ADEPT_MANUAL_MEMORY_ALLOCATION and know the maximum number of\n    // statements and operations you will need\n    void preallocate_statements(uIndex n) {\n      if (n_statements_+n+1 >= n_allocated_statements_) {\n\tgrow_statement_stack(n);\n      }\n    }\n    void preallocate_operations(uIndex n) {\n      if (n_allocated_operations_ < n_operations_+n+1) {\n\tgrow_operation_stack(n);\n      }      \n    }\n\n    // -------------------------------------------------------------------\n    // Stack: 4. Protected member functions\n    // -------------------------------------------------------------------\n  protected:\n    // Initialize the vector of gradients ready for the adjoint\n    // calculation\n    void initialize_gradients();\n\n    // Set to zero the gradients required by a Jacobian calculation\n    /*\n    void zero_gradient_multipass() {\n      for (std::size_t i = 0; i < gradient_multipass_.size(); i++) {\n\tgradient_multipass_[i].zero();\n      }\n    }\n    */\n\n    // OpenMP versions of the forward and reverse Jacobian functions,\n    // which are called from the jacobian_forward and jacobian_reverse\n    // if OpenMP is enabled\n    void jacobian_forward_openmp(Real* jacobian_out,\n\t\t  Index dep_offset, Index indep_offset) const;\n    void jacobian_reverse_openmp(Real* jacobian_out,\n\t\t  Index dep_offset, Index indep_offset) const;\n\n    // The core code for computing Jacobians, used in both OpenMP and\n    // non-OpenMP versions\n    void jacobian_forward_kernel(Real* __restrict gradient_multipass_b) const;\n    void jacobian_forward_kernel_packet(Real* __restrict gradient_multipass_b) const;\n    void jacobian_forward_kernel_extra(Real* __restrict gradient_multipass_b, uIndex) const;\n    void jacobian_reverse_kernel(Real* __restrict gradient_multipass_b) const;\n    void jacobian_reverse_kernel_packet(Real* __restrict gradient_multipass_b) const;\n    void jacobian_reverse_kernel_extra(Real* __restrict gradient_multipass_b, uIndex) const;\n\n    // -------------------------------------------------------------------\n    // Stack: 5. Data\n    // -------------------------------------------------------------------\n  protected:\n\n#ifdef ADEPT_STACK_STORAGE_STL\n    // Data are stored using standard template library containers\n    //    std::valarray<Real> gradient_;\n    std::vector<Real> gradient_;\n#else\n    // Data are stored as dynamically allocated arrays\n    Real* __restrict gradient_;\n#endif\n    // For Jacobians we process multiple rows/columns at once so need\n    // what is essentially a 2D array\n    //    std::vector<Block<ADEPT_MULTIPASS_SIZE,Real> > gradient_multipass_;\n    // uIndexs of the independent and dependent variables\n    std::vector<uIndex> independent_index_;\n    std::vector<uIndex> dependent_index_;\n    // Keep a record of gaps in the gradient array to ensure that gaps\n    // are filled\n    GapList gap_list_;\n    //    Gap* most_recent_gap_;\n    GapListIterator most_recent_gap_;\n\n    uIndex i_gradient_;             // Current number of gradients\n    uIndex n_allocated_gradients_;  // Number of allocated gradients\n    uIndex max_gradient_;           // Max number of gradients to store\n    uIndex n_gradients_registered_; // Number of gradients registered\n    bool gradients_initialized_;    // Have the gradients been\n\t\t\t\t    // initialized?\n    bool is_thread_unsafe_;\n    bool is_recording_;\n    bool have_openmp_;              // true if this header file\n\t\t\t\t    // compiled with -fopenmp\n    bool openmp_manually_disabled_; // true if user called\n\t\t\t\t    // set_max_jacobian_threads(1)\n  }; // End of Stack class\n\n\n  // -------------------------------------------------------------------\n  // Helper functions\n  // -------------------------------------------------------------------\n\n  // Sending a Stack object to a stream reports information about the\n  // stack\n  inline\n  std::ostream& operator<<(std::ostream& os, const adept::Stack& stack) {\n    stack.print_status(os);\n    return os;\n  }\n\n  // Memory to store statements and operations can be preallocated,\n  // offering modest performance advantage if you define\n  // ADEPT_MANUAL_MEMORY_ALLOCATION and know the maximum number of\n  // statements and operations you will need. This version is useful\n  // in functions that don't have visible access to the currently\n  // active Adept stack. \n  inline\n  void preallocate_statements(uIndex n) {\n    ADEPT_ACTIVE_STACK->preallocate_statements(n);\n  }\n  inline\n  void preallocate_operations(uIndex n) {\n    ADEPT_ACTIVE_STACK->preallocate_operations(n);\n  }\n\n  // Returns a pointer to the currently active stack (or 0 if there is none)\n  inline\n  Stack* active_stack() { return ADEPT_ACTIVE_STACK; }\n\n  // Return whether the active stack is stored in a global variable\n  // (thread unsafe) rather than a thread-local global variable\n  // (thread safe)\n#ifdef ADEPT_STACK_THREAD_UNSAFE\n  inline bool is_thread_unsafe() { return true; }\n#else\n  inline bool is_thread_unsafe() { return false; }\n#endif \n\n  // Subsequent code should use adept::active_stack rather than this\n  // preprocessor macro\n  //#undef ADEPT_ACTIVE_STACK\n\n} // End of namespace adept\n\n\n#endif\n"
  },
  {
    "path": "include/adept/StackStorage.h",
    "content": "/* StackStorage.h -- Storage of statement & operation stacks\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   The Stack class inherits from a class providing the storage (and\n   interface to the storage) for the derivative statements that are\n   accumulated during the execution of an algorithm.  The derivative\n   statements are held in two stacks described by Hogan (2014): the\n   \"statement stack\" and the \"operation stack\".\n\n   This file provides the stack storage engine: blocks of dynamically\n   allocated arrays.\n\n*/\n\n#ifndef AdeptStackStorage_H\n#define AdeptStackStorage_H 1\n\n#include <adept/base.h>\n#include <adept/exception.h>\n#include <adept/Statement.h>\n\nnamespace adept {\n  namespace internal {\n\n    // Helper classes\n\n\n    struct StatementBlock {\n      StatementBlock(uIndex n_) : n(0), n_allocated(n_) {\n\tdata = new Statement[n_];\n      }\n      ~StatementBlock() {\n\tdelete [] data;\n      }\n      // Data\n      Statement* data;\n      uIndex n;\n      const uIndex n_allocated;\n    };\n\n    struct OperationBlock {\n      StatementBlock(uIndex n_) : n(0), n_allocated(n_) {\n\tmultiplier = new Real[n_];\n\tindex      = new uIndex[n_];\n      }\n      ~StatementBlock() {\n\tdelete [] multiplier;\n\tdelete [] index;\n      }\n      // Data\n      Real* multiplier;\n      uIndex* index;\n      uIndex n;\n      uIndex n_allocated\n    };\n\n    std::vector<StackBlock> stack_block_;\n    struct StackBlock {\n      StatementBlock* statement_list;\n      OperationBlock* operation_list;\n      uIndex statement_start;\n      uIndex statement_end;\n    };\n\n    std::vector<StatementBlock> statement_data_;\n    std::vector<OperationBlock> operation_data_;\n\n\n\n    class StackStorage {\n    public:\n      // Constructor\n      StackStorage() : \n\tstatement_(0), multiplier_(0), index_(0),\n\tn_statements_(0), n_allocated_statements_(0),\n\tn_operations_(0), n_allocated_operations_(0) { }\n      \n      // Destructor\n      ~StackStorage();\n\n      // Push an operation (i.e. a multiplier-gradient pair) on to the\n      // stack.  We assume here that check_space() as been called before\n      // so there is enough space to hold these elements.\n      void push_rhs(const Real& multiplier, const uIndex& gradient_index) {\n#ifdef ADEPT_REMOVE_NULL_STATEMENTS\n\t// If multiplier==0 then the resulting statement would have no\n\t// effect so we can speed up the subsequent adjoint/jacobian\n\t// calculations (at the expense of making this critical part\n\t// of the code slower)\n\tif (multiplier != 0.0) {\n#endif\n\t  multiplier_[n_operations_] = multiplier;\n\t  index_[n_operations_++] = gradient_index;\n\t\n#ifdef ADEPT_TRACK_NON_FINITE_GRADIENTS\n\t  if (!std::isfinite(multiplier) || std::isinf(multiplier)) {\n\t    throw non_finite_gradient();\n\t  }\n#endif\n\t\n#ifdef ADEPT_REMOVE_NULL_STATEMENTS\n\t}\n#endif\n      }\n\n\n      // Push a statement on to the stack: this is done after a\n      // sequence of operation pushes; gradient_index is the index of\n      // the gradient on the LHS of the expression, while the\n      // \"end_plus_one\" element is simply the current length of the\n      // operation list\n      void push_lhs(const uIndex& gradient_index) {\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tif (n_statements_ >= n_allocated_statements_) {\n\t  grow_statement_stack();\n\t}\n#endif\n\tstatement_[n_statements_].index = gradient_index;\n\tstatement_[n_statements_++].end_plus_one = n_operations_;\n      }\n\n      // Push n left-hand-sides of differential expressions on to the\n      // stack with no corresponding right-hand-side, appropriate if\n      // an array of active variables contiguous in memory (or\n      // separated by a fixed stride) has been assigned to inactive\n      // numbers.\n      void push_lhs_range(const uIndex& first, const uIndex& n, \n\t\t\t  const uIndex& stride = 1) {\n\tuIndex last_plus_1 = first+n*stride;\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tif (n_statements_+n > n_allocated_statements_) {\n\t  grow_statement_stack(n);\n\t}\n#endif\n\tfor (uIndex i = first; i < last_plus_1; i += stride) {\n\t  statement_[n_statements_].index = i;\n\t  statement_[n_statements_++].end_plus_one = n_operations_;\n\t}\n      }\n\n      // Check whether the operation stack contains enough space for n\n      // new operations; if not, grow it\n      void check_space(const uIndex& n) {\n\tif (n_allocated_operations_ < n_operations_+n+1) {\n\t  grow_operation_stack(n);\n\t}\n      }\n      template<uIndex n>\n      void check_space_static() {\n\tcheck_space(n);\n      }\n\n    protected:\n      // Called by new_recording()\n      void clear_stack() { \n\t// Set the recording indices to zero\n\tn_operations_ = 0;\n\tn_statements_ = 0;\n      }\n\n      // This function is called by the constructor to initialize\n      // memory, which can be grown subsequently\n      void initialize(uIndex n) {\n\tmultiplier_ = new Real[n];\n\tindex_ = new uIndex[n];\n\tn_allocated_operations_ = n;\n\tstatement_ = new Statement[n];\n\tn_allocated_statements_ = n;\n      }\n\n      // Grow the capacity of the operation or statement stacks to\n      // hold a minimum of \"min\" elements. If min=0 then the stacks\n      // are doubled in size.\n      void grow_operation_stack(uIndex min = 0);\n      void grow_statement_stack(uIndex min = 0);\n\n    protected:\n      // Data are stored as dynamically allocated arrays\n\n      // The \"statement stack\" is held as a single array\n      Statement* __restrict statement_ ;\n      // The \"operation stack\" is held as two arrays\n      Real*      __restrict multiplier_;\n      uIndex*    __restrict index_;\n\n      uIndex n_statements_;           // Number of statements\n      uIndex n_allocated_statements_; // Space allocated for statements\n      uIndex n_operations_;           // Number of operations\n      uIndex n_allocated_operations_; // Space allocated for statements\n    };\n\n  } // End namespace internal\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/StackStorageOrig.h",
    "content": "/* StackStorageOrig.h -- Original method to store statement & operation stacks\n\n    Copyright (C) 2014-2015 University of Reading\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   The Stack class inherits from a class providing the storage (and\n   interface to the storage) for the derivative statements that are\n   accumulated during the execution of an algorithm.  The derivative\n   statements are held in two stacks described by Hogan (2014): the\n   \"statement stack\" and the \"operation stack\".\n\n   This file provides the original storage engine: dynamically\n   allocated arrays with the two stacks resulting from an entire\n   algorithm being contiguous in memory.  This is not ideal for very\n   large algorithms.\n\n*/\n\n#ifndef AdeptStackStorageOrig_H\n#define AdeptStackStorageOrig_H 1\n\n#include <adept/base.h>\n#include <adept/exception.h>\n#include <adept/Statement.h>\n\nnamespace adept {\n  namespace internal {\n\n    class StackStorageOrig {\n    public:\n      // Constructor\n      StackStorageOrig() : \n\tstatement_(0), multiplier_(0), index_(0),\n\tn_statements_(0), n_allocated_statements_(0),\n\tn_operations_(0), n_allocated_operations_(0) { }\n      \n      // Destructor\n      ~StackStorageOrig();\n\n      // Push an operation (i.e. a multiplier-gradient pair) on to the\n      // stack.  We assume here that check_space() as been called before\n      // so there is enough space to hold these elements.\n      void push_rhs(const Real& multiplier, const uIndex& gradient_index) {\n#ifdef ADEPT_REMOVE_NULL_STATEMENTS\n\t// If multiplier==0 then the resulting statement would have no\n\t// effect so we can speed up the subsequent adjoint/jacobian\n\t// calculations (at the expense of making this critical part\n\t// of the code slower)\n\tif (multiplier != 0.0) {\n#endif\n\t  multiplier_[n_operations_] = multiplier;\n\t  index_[n_operations_++] = gradient_index;\n\t\n#ifdef ADEPT_TRACK_NON_FINITE_GRADIENTS\n\t  if (!std::isfinite(multiplier) || std::isinf(multiplier)) {\n\t    throw non_finite_gradient();\n\t  }\n#endif\n\t\n#ifdef ADEPT_REMOVE_NULL_STATEMENTS\n\t}\n#endif\n      }\n\n      // Push the gradient indices of a vectorized operation on to the\n      // stack.  We assume here that check_space() as been called\n      // before so there is enough space to hold these elements. The\n      // multipliers will be added later.\n      template <Index Num, Index Stride>\n      void push_rhs_indices(const uIndex& gradient_index) {\n\tfor (Index i = 0; i < Num; ++i) {\n\t  index_[n_operations_+i*Stride] = gradient_index+i;\n\t}\n\t++n_operations_;\n      }\n\n      // Push a statement on to the stack: this is done after a\n      // sequence of operation pushes; gradient_index is the index of\n      // the gradient on the LHS of the expression, while the\n      // \"end_plus_one\" element is simply the current length of the\n      // operation list\n      void push_lhs(const uIndex& gradient_index) {\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tif (n_statements_ >= n_allocated_statements_) {\n\t  grow_statement_stack();\n\t}\n#endif\n\tstatement_[n_statements_].index = gradient_index;\n\tstatement_[n_statements_++].end_plus_one = n_operations_;\n      }\n\n      // Push n left-hand-sides of differential expressions on to the\n      // stack with no corresponding right-hand-side, appropriate if\n      // an array of active variables contiguous in memory (or\n      // separated by a fixed stride) has been assigned to inactive\n      // numbers. Note that the second and third arguments must not be\n      // references, since they may be compile-time constants for\n      // FixedArray objects.\n      void push_lhs_range(const uIndex& first, uIndex n, uIndex stride = 1) {\n\tuIndex last_plus_1 = first+n*stride;\n#ifndef ADEPT_MANUAL_MEMORY_ALLOCATION\n\tif (n_statements_+n > n_allocated_statements_) {\n\t  grow_statement_stack(n);\n\t}\n#endif\n\tfor (uIndex i = first; i < last_plus_1; i += stride) {\n\t  statement_[n_statements_].index = i;\n\t  statement_[n_statements_++].end_plus_one = n_operations_;\n\t}\n      }\n\n      // Check whether the operation stack contains enough space for n\n      // new operations; if not, grow it\n      void check_space(uIndex n) {\n\tif (n_allocated_operations_ < n_operations_+n+1) {\n\t  grow_operation_stack(n);\n\t}\n      }\n      template<uIndex n>\n      void check_space_static() {\n\tcheck_space(n);\n      }\n\n    protected:\n      // Called by new_recording()\n      void clear_stack() { \n\t// Set the recording indices to zero\n\tn_operations_ = 0;\n\tn_statements_ = 0;\n      }\n\n      // This function is called by the constructor to initialize\n      // memory, which can be grown subsequently\n      void initialize(uIndex n) {\n\tmultiplier_ = new Real[n];\n\tindex_ = new uIndex[n];\n\tn_allocated_operations_ = n;\n\tstatement_ = new Statement[n];\n\tn_allocated_statements_ = n;\n      }\n\n      // Grow the capacity of the operation or statement stacks to\n      // hold a minimum of \"min\" elements. If min=0 then the stacks\n      // are doubled in size.\n      void grow_operation_stack(uIndex min = 0);\n      void grow_statement_stack(uIndex min = 0);\n\n    protected:\n      // Data are stored as dynamically allocated arrays\n\n      // The \"statement stack\" is held as a single array\n      Statement* __restrict statement_ ;\n      // The \"operation stack\" is held as two arrays\n      Real*      __restrict multiplier_;\n      uIndex*    __restrict index_;\n\n      uIndex n_statements_;           // Number of statements\n      uIndex n_allocated_statements_; // Space allocated for statements\n      uIndex n_operations_;           // Number of operations\n      uIndex n_allocated_operations_; // Space allocated for statements\n    };\n\n  } // End namespace internal\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/StackStorageOrigStl.h",
    "content": "/* StackStorageOrigStl.h -- Original storage of stacks using STL containers\n\n    Copyright (C) 2014-2015 University of Reading\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   The Stack class inherits from a class providing the storage (and\n   interface to the storage) for the derivative statements that are\n   accumulated during the execution of an algorithm.  The derivative\n   statements are held in two stacks described by Hogan (2014): the\n   \"statement stack\" and the \"operation stack\".\n\n   This file provides one of the original storage engine, which used\n   std::vector to hold the two stacks. Note that these stacks are\n   contiguous in memory, which is not ideal for very large algorithms.\n\n*/\n\n#ifndef AdeptStackStorageOrigStl_H\n#define AdeptStackStorageOrigStl_H 1\n\n#include <adept/base.h>\n#include <adept/exception.h>\n#include <adept/Statement.h>\n\nnamespace adept {\n  namespace internal {\n\n    class StackStorageOrigStl {\n    public:\n      // Constructor\n      StackStorageOrigStl() :\n\tn_statements_(0), n_allocated_statements_(0),\n\tn_operations_(0), n_allocated_operations_(0) { }\n      \n      // Destructor (does nothing)\n      ~StackStorageOrigStl() { };\n\n      // Push an operation (i.e. a multiplier-gradient pair) on to the\n      // stack.  We assume here that check_space() as been called before\n      // so there is enough space to hold these elements.\n      void push_rhs(const Real& multiplier, const uIndex& gradient_index) {\n#ifdef ADEPT_REMOVE_NULL_STATEMENTS\n\t// If multiplier==0 then the resulting statement would have no\n\t// effect so we can speed up the subsequent adjoint/jacobian\n\t// calculations (at the expense of making this critical part\n\t// of the code slower)\n\tif (multiplier != 0.0) {\n#endif\n\t  multiplier_.push_back(multiplier);\n\t  index_.push_back(gradient_index);\n\t  n_operations_++;\n\t\n#ifdef ADEPT_TRACK_NON_FINITE_GRADIENTS\n\t  if (!std::isfinite(multiplier) || std::isinf(multiplier)) {\n\t    throw non_finite_gradient();\n\t  }\n#endif\n\t\n#ifdef ADEPT_REMOVE_NULL_STATEMENTS\n\t}\n#endif\n      }\n\n\n      // Push a statement on to the stack: this is done after a\n      // sequence of operation pushes; gradient_index is the index of\n      // the gradient on the LHS of the expression, while the\n      // \"end_plus_one\" element is simply the current length of the\n      // operation list\n      void push_lhs(const uIndex& gradient_index) {\n\tstatement_.push_back(Statement(gradient_index, n_operations_));\n\tn_statements_++;\n      }\n\n      // Push n left-hand-sides of differential expressions on to the\n      // stack with no corresponding right-hand-side, appropriate if\n      // an array of active variables contiguous in memory (or\n      // separated by a fixed stride) has been assigned to inactive\n      // numbers.\n      void push_lhs_range(const uIndex& first, const uIndex& n, \n\t\t\t  const uIndex& stride = 1) {\n\tuIndex last_plus_1 = first+n*stride;\n\tfor (uIndex i = first; i < last_plus_1; i += stride) {\n\t  statement_.push_back(Statement(i, n_operations_));\n\t}\n\tn_statements_ += n;\n      }\n\n      // Check whether the operation stack contains enough space for n\n      // new operations; for STL containers this does nothing\n      void check_space(const uIndex& n) { }\n      template<uIndex n> void check_space_static() { }\n\n    protected:\n      // Called by new_recording()\n      void clear_stack() { \n\t// If we use STL containers then the clear() function sets their\n\t// size to zero but leaves the memory allocated\n\tstatement_.clear();\n\tmultiplier_.clear();\n\tindex_.clear();\n\t// Set the recording indices to zero\n\tn_operations_ = 0;\n\tn_statements_ = 0;\n      }\n\n      // This function is called by the constructor to initialize\n      // memory, which can be grown subsequently\n      void initialize(uIndex n) {\n\tstatement_.reserve(n);\n\tmultiplier_.reserve(n);\n\tindex_.reserve(n);\n      }\n\n      // Grow the capacity of the operation or statement stacks to\n      // hold a minimum of \"min\" elements. If min=0 then the stacks\n      // are doubled in size.\n      void grow_operation_stack(uIndex min = 0);\n      void grow_statement_stack(uIndex min = 0);\n\n    protected:\n      // Data are stored using standard template library containers\n\n      // The \"statement stack\" is held as a single array\n      std::vector<Statement> statement_;\n      // The \"operation stack\" is held as two arrays\n      std::vector<Real> multiplier_;\n      std::vector<uIndex> index_;\n\n      uIndex n_statements_;           // Number of statements\n      uIndex n_allocated_statements_; // Space allocated for statements\n      uIndex n_operations_;           // Number of operations\n      uIndex n_allocated_operations_; // Space allocated for statements\n    };\n\n  } // End namespace internal\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/Statement.h",
    "content": "/* Statement.h -- Original method to store statement & operation stacks\n\n    Copyright (C) 2012-2014 University of Reading\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptStatement_H\n#define AdeptStatement_H 1\n\n#include <adept/base.h>\n\nnamespace adept {\n  namespace internal {\n\n    // Structure describing the LHS of a derivative expression.  For dx\n    // = z dy + y dz, \"index\" would be the location of dx in the\n    // gradient list, and \"end_plus_one\" would be one plus the location\n    // of the final operation (multiplier-derivative pair) on the RHS,\n    // in this case y dz.\n    struct Statement {\n      Statement() { }\n      Statement(uIndex index_, uIndex end_plus_one_)\n\t: index(index_), end_plus_one(end_plus_one_) { }\n      uIndex index;\n      uIndex end_plus_one;\n    };\n \n  }\n}\n\n#endif\n"
  },
  {
    "path": "include/adept/Storage.h",
    "content": "/* Storage.h -- store array of active or inactive data\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   The Storage class manages the data underlying array objects, and\n   uses a model of reference counting so that multiple objects can\n   refer to the same data.  This enables arrays that are actually\n   subsets of another array to be treated as normal array objects.\n\n*/\n\n#ifndef AdeptStorage_H\n#define AdeptStorage_H 1\n\n#include <string>\n#include <sstream>\n#include <limits>\n#include <complex>\n\n#include <adept/exception.h>\n#include <adept/base.h>\n#include <adept/Stack.h>\n#include <adept/Packet.h>\n#include <adept/traits.h>\n\n#ifdef ADEPT_STORAGE_THREAD_SAFE\n#include <atomic>\n#endif\n\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Global variables\n  // -------------------------------------------------------------------\n  namespace internal {\n    // To check for memory leaks, we keep a running total of the number\n    // of Storage objects that are created and destroyed\n    extern Index n_storage_objects_created_;\n    extern Index n_storage_objects_deleted_;\n  }\n\n  // -------------------------------------------------------------------\n  // Definition of Storage class\n  // -------------------------------------------------------------------\n  template <typename Type>\n  class Storage {\n  public:\n    // -------------------------------------------------------------------\n    // Storage: 1. Constructors and destructor\n    // -------------------------------------------------------------------\n\n    // The only way to construct this object is by passing it an\n    // integer indicating the size, and optionally for active objects,\n    // an integer representing the index to the gradients stored in\n    // the stack.\n    Storage(Index n, bool IsActive = false)\n      : n_(n), n_links_(1), gradient_index_(-1) {\n      data_ = internal::alloc_aligned<Type>(n);\n#ifdef ADEPT_INIT_REAL\n      initialize<Type>();\n#endif\n      internal::n_storage_objects_created_++; \n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n      if (IsActive) {\n\tgradient_index_ = ADEPT_ACTIVE_STACK->register_gradients(n);\n      }\n#endif\n    }\n    \n  protected:\n    // Only allow the class to destroy itself by putting in\n    // \"protected\".  FIX - would be better to start valid\n    // gradient_index at 1, so 0 is reserved for invalid values.\n    ~Storage() {\n      internal::free_aligned(data_);\n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (ADEPT_ACTIVE_STACK->is_recording()) {\n#endif\n\tif (gradient_index_ >= 0) {\n\t  ADEPT_ACTIVE_STACK->unregister_gradients(gradient_index_, n_);\n\t}\n#ifdef ADEPT_RECORDING_PAUSABLE\n      }\n#endif\n#endif\n      internal::n_storage_objects_deleted_++; \n    }\n\n    // Null initialization, copy and assignment methods that are\n    // \"protected\" to prevent them being used\n    Storage() { }\n    Storage(Storage& storage) { };\n    void operator=(Storage& storage) { };\n\n#ifdef ADEPT_INIT_REAL\n\n    // Initialize to zero, NaN or whatever for debugging\n    template <typename T>\n    typename internal::enable_if<internal::is_floating_point<T>::value, void>::type\n    initialize() {\n      for (int i = 0; i < n_; ++i) {\n\tdata_[i] = ADEPT_INIT_REAL;\n      }\n    }\n    template <typename T>\n    typename internal::enable_if<internal::is_complex<T>::value, void>::type\n    initialize() {\n      for (int i = 0; i < n_; ++i) {\n#ifdef ADEPT_INIT_REAL_SNAN\n        data_[i] = std::complex<typename Type::value_type>(\n          std::numeric_limits<typename Type::value_type>::signaling_NaN(),\n\t  std::numeric_limits<typename Type::value_type>::signaling_NaN());\n#else\n\tdata_[i] = std::complex<typename Type::value_type>(ADEPT_INIT_REAL, ADEPT_INIT_REAL);\n#endif\n      }\n    }\n\n    // Dummy initialize for non-floats\n    template <typename T>\n    typename internal::enable_if<!internal::is_floating_point<T>::value\n\t\t\t\t && !internal::is_complex<T>::value, void>::type\n    initialize() { }\n\n#endif\n\n\n    // -------------------------------------------------------------------\n    // Storage: 2. Public member functions\n    // -------------------------------------------------------------------  \n  public:\n    // Add link to an existing storage object\n    void add_link()\n    { n_links_++; } \n    \n    // Remove link as follows; this is only safe in a multi-threaded\n    // environment if ADEPT_STORAGE_THREAD_SAFE is defined, making\n    // n_links_ atomic\n    void remove_link() {\n      if (n_links_ == 0) {\n\tthrow invalid_operation(\"Attempt to remove more links to a storage object than set\"\n\t\t\t\tADEPT_EXCEPTION_LOCATION);\n      }\n      else if (--n_links_ == 0) {\n\tdelete this;\n      }\n    }\n\n    // Return the number of elements allocated\n    Index n_allocated() const\n    { return n_; }\n\n    // Return the number of links to an object\n    int n_links() const\n    { return n_links_; }\n\n    Index gradient_index() const\n    { return gradient_index_; }\n\n    // Return pointer to the start of the data\n    Type*\n    data()\n    { return data_; }\n    const Type*\n    data() const\n    { return data_; }\n\n    // Return a string of information\n    std::string\n    info_string() const {\n      std::stringstream x;\n      x << n_ << \" \" << sizeof(Type) << \"-byte elements allocated with \"\n\t<< n_links_ << \" links\";\n      return x.str();\n    }\n\n    // -------------------------------------------------------------------\n    // Storage: 3. Data\n    // -------------------------------------------------------------------  \n  private:\n    // Pointer to the start of the data\n    Type* data_;\n    // Number of elements allocated\n    Index n_;\n    // Number of links to the storage object allowing for arrays and\n    // array slices to point to the same data. If this falls to zero\n    // the Storage object will destruct itself\n#ifdef ADEPT_STORAGE_THREAD_SAFE\n    // If multiple threads are to simultaneously read subsets of this\n    // array then accesses to the reference counter must be made\n    // atomic\n    std::atomic<int> n_links_;\n#else\n    int n_links_;\n#endif\n    // For active variables, this s the gradient index of the first\n    // element.  It would be better to only store this if Type is\n    // floating point.\n    Index gradient_index_;\n\n  }; // End of Storage class\n  \n\n  // -------------------------------------------------------------------\n  // Helper functions\n  // -------------------------------------------------------------------\n  inline Index n_storage_objects()\n  { return internal::n_storage_objects_created_\n      - internal::n_storage_objects_deleted_; }\n\n  inline Index n_storage_objects_created()\n  { return internal::n_storage_objects_created_; }\n  \n  inline Index n_storage_objects_deleted()\n  { return internal::n_storage_objects_deleted_; }\n  \n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/UnaryOperation.h",
    "content": "/* UnaryOperation.h -- Unary operations on Adept expressions\n\n    Copyright (C) 2014-2020 European Centre for Medium-Range Weather Forecasts\n\n    Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptUnaryOperation_H\n#define AdeptUnaryOperation_H\n\n#include <adept/Expression.h>\n\n#include <adept/ArrayWrapper.h>\n\nnamespace adept {\n\n  namespace internal {\n\n    // ---------------------------------------------------------------------\n    // SECTION 3.1: Unary operations: define UnaryOperation type\n    // ---------------------------------------------------------------------\n\n    // Unary operations derive from this class, where Op is a policy\n    // class defining how to implement the operation, and R is the\n    // type of the argument of the operation\n    template <typename Type, template<class> class Op, class R>\n    struct UnaryOperation\n      : public Expression<Type, UnaryOperation<Type, Op, R> >,\n\tprotected Op<Type> {\n      \n      static const int  rank       = R::rank;\n      static const bool is_active  = R::is_active && !is_same<Type,bool>::value;\n      static const int  n_active   = R::n_active;\n      // FIX! Only store if active and if needed\n      static const int  n_scratch  = 1 + R::n_scratch;\n      static const int  n_arrays   = R::n_arrays;\n      // Will need to modify this for sqrt:\n      static const bool is_vectorizable\n\t= Op<Type>::is_vectorized && R::is_vectorizable;\n\n      using Op<Type>::operation;\n      using Op<Type>::operation_string;\n      using Op<Type>::derivative;\n      \n      //const R& arg;\n      typename nested_expression<R>::type arg;\n\n      UnaryOperation(const Expression<Type, R>& arg_)\n\t: arg(arg_.cast()) { }\n      \n      template <int Rank>\n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn arg.get_dimensions(dim);\n      }\n\n      std::string expression_string_() const {\n\tstd::string str;\n\tstr = operation_string();\n\tstr += \"(\" + arg.expression_string() + \")\";\n\treturn str;\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn arg.is_aliased(mem1, mem2);\n      }\n      bool all_arrays_contiguous_() const {\n\treturn arg.all_arrays_contiguous_();\n      }\n       bool is_aligned_() const {\n\treturn arg.is_aligned_();\n      }\n      template <int n>\n      int alignment_offset_() const { return arg.template alignment_offset_<n>(); }\n\n      template <int Rank>\n      Type value_with_len_(Index i, Index len) const {\n\treturn operation(arg.value_with_len(i, len));\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\targ.template advance_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(arg.template value_at_location_<MyArrayNum>(loc));\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(arg.template packet_at_location_<MyArrayNum>(loc));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] \n\t  = operation(arg.template value_at_location_store_<MyArrayNum,MyScratchNum+1>(loc, scratch));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <bool IsAligned,\tint MyArrayNum, typename PacketType,\n\tint NArrays>\n      PacketType values_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(arg.template values_at_location_<IsAligned,MyArrayNum,PacketType>(loc));\n      }\n\n      template <bool UseStored, bool IsAligned,\tint MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<!UseStored,PacketType>::type\n      values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\tScratchVector<NScratch,PacketType>& scratch) const {\n\treturn scratch[MyScratchNum]\n\t  = operation(arg.template values_at_location_store_<UseStored,IsAligned,\n\t\t      MyArrayNum,MyScratchNum+1>(loc, scratch));\n      }\n      template <bool UseStored, bool IsAligned,\tint MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      typename enable_if<UseStored,PacketType>::type\n      values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\tScratchVector<NScratch,PacketType>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\targ.template calc_gradient_<MyArrayNum, MyScratchNum+1>(stack, loc, scratch,\n\t\tderivative(arg.template value_stored_<MyArrayNum,MyScratchNum+1>(loc, scratch),\n\t\t\t   scratch[MyScratchNum]));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch,\n\t\ttypename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n\targ.template calc_gradient_<MyArrayNum, MyScratchNum+1>(stack, loc, scratch,\n\t\tmultiplier*derivative(arg.template value_stored_<MyArrayNum,MyScratchNum+1>(loc, scratch), \n\t\t\t\t      scratch[MyScratchNum]));\n      }\n\n      template <bool IsAligned, int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t\tint NArrays, int NScratch, int NActive>\n      void calc_gradient_packet_(Stack& stack, \n\t\t\t\t const ExpressionSize<NArrays>& loc,\n\t\t\t\t const ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t\t ScratchVector<NActive,Packet<Real> >& gradients) const {\n\targ.template calc_gradient_packet_<IsAligned,MyArrayNum,MyScratchNum+1,\n\t\t\t\t\t   MyActiveNum>(stack, loc, scratch, gradients,\n\t\tderivative(arg.template values_at_location_store_<true,IsAligned,MyArrayNum,MyScratchNum+1,\n\t\t\t   MyActiveNum>(loc, scratch), scratch[MyScratchNum]));\n      }\n\n      template <bool IsAligned, int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t\tint NArrays, int NScratch, int NActive, typename MyType>\n      void calc_gradient_packet_(Stack& stack, \n\t\t\t\t const ExpressionSize<NArrays>& loc,\n\t\t\t\t const ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t\t ScratchVector<NActive,Packet<Real> >& gradients,\n\t\t\t\t const MyType& multiplier) const {\n\targ.template calc_gradient_packet_<IsAligned,MyArrayNum,MyScratchNum+1,\n\t\t\t\t\t   MyActiveNum>(stack, loc, scratch, gradients,\n\t\tmultiplier*derivative(arg.template values_at_location_store_<true,IsAligned,MyArrayNum,MyScratchNum+1,\n\t\t\t\t      MyActiveNum>(loc, scratch), scratch[MyScratchNum]));\n      }\n\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\targ.template set_location_<MyArrayNum>(i, index);\n      }\n\n    }; // End UnaryOperation type\n  \n  } // End namespace internal\n\n  // ---------------------------------------------------------------------\n  // SECTION 3.2: Unary operations: define specific operations\n  // ---------------------------------------------------------------------\n\n  // We may place the overloaded mathematical functions in the global\n  // namespace provided that a using declaration enables the std::\n  // version of the function to be located\n#define ADEPT_DEF_UNARY_FUNC(NAME, FUNC, RAWFUNC, STRING, DERIVATIVE,\t\\\n\t\t\t     ISVEC)\t\t\t\t\t\\\n  namespace internal {\t\t\t\t\t\t\t\\\n    template <typename Type>\t\t\t\t\t\t\\\n    struct NAME  {\t\t\t\t\t\t\t\\\n      static const bool is_operator = false;\t\t\t\t\\\n      static const bool is_vectorized = ISVEC;\t\t\t\t\\\n      const char* operation_string() const { return STRING; }\t\t\\\n      template <typename T>\t\t\t\t\t\t\\\n      T operation(const T& val) const {\t\t\t\t\t\\\n\tusing RAWFUNC;\t\t\t\t\t\t\t\\\n\treturn FUNC(val);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      Type derivative(const Type& val, const Type& result) const {\t\\\n\tusing std::sin;\t\t\t\t\t\t\t\\\n\tusing std::cos;\t\t\t\t\t\t\t\\\n\tusing std::sqrt;\t\t\t\t\t\t\\\n\tusing std::cosh;\t\t\t\t\t\t\\\n\tusing std::sinh;\t\t\t\t\t\t\\\n\tusing std::exp;\t\t\t\t\t\t\t\\\n\treturn DERIVATIVE;\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      Type fast_sqr(Type val) const { return val*val; }\t\t\t\\\n    };\t\t\t\t\t\t\t\t\t\\\n  } /* End namespace internal */\t\t\t\t\t\\\n  template <class Type, class R>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  adept::internal::UnaryOperation<Type, adept::internal::NAME, R>\t\\\n  FUNC(const adept::Expression<Type, R>& r)\t{\t\t\t\\\n    return adept::internal::UnaryOperation<Type,\t\t\t\\\n\t\t\t\t   adept::internal::NAME, R>(r.cast()); \\\n  }\n\n  // Functions y(x) whose derivative depends on the argument of the\n  // function, i.e. dy(x)/dx = f(x)\n  ADEPT_DEF_UNARY_FUNC(Log,   log,   std::log,   \"log\",   1.0/val, false)\n  ADEPT_DEF_UNARY_FUNC(Log10, log10, std::log10, \"log10\", 0.43429448190325182765/val, false)\n  ADEPT_DEF_UNARY_FUNC(Sin,   sin,   std::sin,   \"sin\",   cos(val), false)\n  ADEPT_DEF_UNARY_FUNC(Cos,   cos,   std::cos,   \"cos\",   -sin(val), false)\n  ADEPT_DEF_UNARY_FUNC(Tan,   tan,   std::tan,   \"tan\",   1.0/fast_sqr(cos(val)), false)\n  ADEPT_DEF_UNARY_FUNC(Asin,  asin,  std::asin,  \"asin\",  1.0/sqrt(1.0-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Acos,  acos,  std::acos,  \"acos\",  -1.0/sqrt(1.0-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Atan,  atan,  std::atan,  \"atan\",  1.0/(1.0+val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Sinh,  sinh,  std::sinh,  \"sinh\",  cosh(val), false)\n  ADEPT_DEF_UNARY_FUNC(Cosh,  cosh,  std::cosh,  \"cosh\",  sinh(val), false)\n  ADEPT_DEF_UNARY_FUNC(Abs,   abs,   std::abs,   \"abs\",   ((val>0.0)-(val<0.0)), false)\n  ADEPT_DEF_UNARY_FUNC(Fabs,  fabs,  std::fabs,  \"fabs\",  ((val>0.0)-(val<0.0)), false)\n\n  // Functions y(x) whose derivative depends on the result of the\n  // function, i.e. dy(x)/dx = f(y)\n  ADEPT_DEF_UNARY_FUNC(Sqrt,  sqrt,  std::sqrt,  \"sqrt\",  0.5/result, true)\n  ADEPT_DEF_UNARY_FUNC(Tanh,  tanh,  std::tanh,  \"tanh\",  1.0 - result*result, false)\n\n  // Adept's vectorizable exponential function\n  ADEPT_DEF_UNARY_FUNC(Fastexp, fastexp, adept::fastexp, \"fastexp\", result, true)\n#ifdef ADEPT_FAST_EXPONENTIAL\n  ADEPT_DEF_UNARY_FUNC(Exp,   exp,   adept::functions::exp, \"fastexp\", result, true)\n#else\n  ADEPT_DEF_UNARY_FUNC(Exp,   exp,   std::exp,   \"exp\",   result, false)\n#endif\n\n  // Functions with zero derivative\n  ADEPT_DEF_UNARY_FUNC(Ceil,  ceil,  std::ceil,  \"ceil\",  0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Floor, floor, std::floor, \"floor\", 0.0, false)\n  \n  // Functions defined in the std namespace in C++11 but only in the\n  // global namespace before that\n#ifdef ADEPT_CXX11_FEATURES\n  ADEPT_DEF_UNARY_FUNC(Log2,  log2,  std::log2,  \"log2\",  1.44269504088896340737/val, false)\n  ADEPT_DEF_UNARY_FUNC(Expm1, expm1, std::expm1, \"expm1\", exp(val), false)\n  ADEPT_DEF_UNARY_FUNC(Exp2,  exp2,  std::exp2,  \"exp2\",  0.6931471805599453094172321214581766*result, false)\n  ADEPT_DEF_UNARY_FUNC(Log1p, log1p, std::log1p, \"log1p\", 1.0/(1.0+val), false)\n  ADEPT_DEF_UNARY_FUNC(Asinh, asinh, std::asinh, \"asinh\", 1.0/sqrt(val*val+1.0), false)\n  ADEPT_DEF_UNARY_FUNC(Acosh, acosh, std::acosh, \"acosh\", 1.0/sqrt(val*val-1.0), false)\n  ADEPT_DEF_UNARY_FUNC(Atanh, atanh, std::atanh, \"atanh\", 1.0/(1.0-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Erf,   erf,   std::erf,   \"erf\",   1.12837916709551*exp(-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Erfc,  erfc,  std::erfc,  \"erfc\",  -1.12837916709551*exp(-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Cbrt,  cbrt,  std::cbrt,  \"cbrt\",  (1.0/3.0)/(result*result), false)\n  ADEPT_DEF_UNARY_FUNC(Round, round, std::round, \"round\", 0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Trunc, trunc, std::trunc, \"trunc\", 0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Rint,  rint,  std::rint,  \"rint\",  0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Nearbyint,nearbyint,std::nearbyint,\"nearbyint\",0.0, false)\n#else\n  ADEPT_DEF_UNARY_FUNC(Log2,  log2,  ::log2,  \"log2\",  1.44269504088896340737/val, false)\n  ADEPT_DEF_UNARY_FUNC(Expm1, expm1, ::expm1, \"expm1\", exp(val), false)\n  ADEPT_DEF_UNARY_FUNC(Exp2,  exp2,  ::exp2,  \"exp2\",  0.6931471805599453094172321214581766*result, false)\n  ADEPT_DEF_UNARY_FUNC(Log1p, log1p, ::log1p, \"log1p\", 1.0/(1.0+val), false)\n  ADEPT_DEF_UNARY_FUNC(Asinh, asinh, ::asinh, \"asinh\", 1.0/sqrt(val*val+1.0), false)\n  ADEPT_DEF_UNARY_FUNC(Acosh, acosh, ::acosh, \"acosh\", 1.0/sqrt(val*val-1.0), false)\n  ADEPT_DEF_UNARY_FUNC(Atanh, atanh, ::atanh, \"atanh\", 1.0/(1.0-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Erf,   erf,   ::erf,   \"erf\",   1.12837916709551*exp(-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Erfc,  erfc,  ::erfc,  \"erfc\",  -1.12837916709551*exp(-val*val), false)\n  ADEPT_DEF_UNARY_FUNC(Cbrt,  cbrt,  ::cbrt,  \"cbrt\",  (1.0/3.0)/(result*result), false)\n  ADEPT_DEF_UNARY_FUNC(Round, round, ::round, \"round\", 0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Trunc, trunc, ::trunc, \"trunc\", 0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Rint,  rint,  ::rint,  \"rint\",  0.0, false)\n  ADEPT_DEF_UNARY_FUNC(Nearbyint,nearbyint,::nearbyint,\"nearbyint\",0.0, false)\n#endif\n\n  //#undef ADEPT_DEF_UNARY_FUNC\n\n#define ADEPT_DEF_UNARY_OP(NAME, FUNC, RAWFUNC, STRING, DERIVATIVE,\t\\\n\t\t\t   ISVEC)\t\t\t\t\t\\\n  namespace internal {\t\t\t\t\t\t\t\\\n    template <typename Type>\t\t\t\t\t\t\\\n    struct NAME  {\t\t\t\t\t\t\t\\\n      static const bool is_operator = false;\t\t\t\t\\\n      static const bool is_vectorized = ISVEC;\t\t\t\t\\\n      const char* operation_string() const { return STRING; }\t\t\\\n      template <typename T>\t\t\t\t\t\t\\\n      T operation(const T& val) const {\t\t\t\t\t\\\n\treturn RAWFUNC(val);\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      Type derivative(const Type& val, const Type& result) const {\t\\\n\treturn DERIVATIVE;\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      Type fast_sqr(Type val) { return val*val; }\t\t\t\\\n    };\t\t\t\t\t\t\t\t\t\\\n  } /* End namespace internal */\t\t\t\t\t\\\n  template <class Type, class R>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  adept::internal::UnaryOperation<Type, adept::internal::NAME, R>\t\\\n  FUNC(const adept::Expression<Type, R>& r)\t{\t\t\t\\\n    return adept::internal::UnaryOperation<Type,\t\t\t\\\n\t\t\t\t   adept::internal::NAME, R>(r.cast()); \\\n  }\n  \n  // Operators\n  ADEPT_DEF_UNARY_OP(UnaryPlus,  operator+, +, \"+\", 1.0, true)\n  ADEPT_DEF_UNARY_OP(UnaryMinus, operator-, -, \"-\", -1.0, true)\n  ADEPT_DEF_UNARY_OP(Not,        operator!, !, \"!\", 0.0, false)\n\n\n  // ---------------------------------------------------------------------\n  // SECTION 3.4: Unary operations: transpose function [DELETED]\n  // ---------------------------------------------------------------------\n\n  // ---------------------------------------------------------------------\n  // SECTION 3.5: Unary operations: returning boolean expression\n  // ---------------------------------------------------------------------\n  namespace internal {\n\n    // Unary operations returning bool derive from this class, where\n    // Op is a policy class defining how to implement the operation,\n    // and R is the type of the argument of the operation\n    template <typename Type, template<class> class Op, class R>\n    struct UnaryBoolOperation\n      : public Expression<bool, UnaryBoolOperation<Type, Op, R> >,\n\tprotected Op<Type> {\n      \n      static const int  rank       = R::rank;\n      static const bool is_active  = false;\n      static const int  n_active   = 0;\n      static const int  n_scratch  = 0;\n      static const int  n_arrays   = R::n_arrays;\n      \n      using Op<Type>::operation;\n      using Op<Type>::operation_string;\n      \n      const R& arg;\n\n      UnaryBoolOperation(const Expression<Type, R>& arg_)\n\t: arg(arg_.cast()) { }\n      \n      template <int Rank>\n      bool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn arg.get_dimensions(dim);\n      }\n\n      std::string expression_string_() const {\n\tstd::string str;\n\tstr = operation_string();\n\tstr += \"(\" + static_cast<const R*>(&arg)->expression_string() + \")\";\n\treturn str;\n      }\n\n      bool is_aliased_(const bool* mem1, const bool* mem2) const {\n\treturn false;\n      }\n      bool all_arrays_contiguous_() const {\n\treturn arg.all_arrays_contiguous_(); \n      }\n      template <int n>\n      int alignment_offset_() const { return arg.template alignment_offset_<n>(); }\n\n      template <int Rank>\n      Type value_with_len_(Index i, Index len) const {\n\treturn operation(arg.value_with_len(i, len));\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\targ.template advance_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(arg.template value_at_location_<MyArrayNum>(loc));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] \n\t  = operation(arg.template value_at_location_store_<MyArrayNum,MyScratchNum+1>(loc, scratch));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <bool IsAligned,\tint MyArrayNum, typename PacketType,\n\tint NArrays>\n      PacketType values_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn operation(arg.template values_at_location_<IsAligned,MyArrayNum,PacketType>(loc));\n      }\n\n      template <bool UseStored, bool IsAligned,\tint MyArrayNum, int MyScratchNum,\n\t\ttypename PacketType, int NArrays, int NScratch>\n      PacketType values_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t   ScratchVector<NScratch,PacketType>& scratch) const {\n\treturn operation(arg.template values_at_location_<IsAligned,MyArrayNum,PacketType>(loc));\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch,\n\t\ttypename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const { }\n\n      template <bool IsAligned, int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t\tint NArrays, int NScratch, int NActive>\n      void calc_gradient_packet_(Stack& stack, \n\t\t\t\t const ExpressionSize<NArrays>& loc,\n\t\t\t\t const ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t\t ScratchVector<NActive,Packet<Real> >& gradients) const {}\n\n      template <bool IsAligned, int MyArrayNum, int MyScratchNum, int MyActiveNum,\n\t\tint NArrays, int NScratch, int NActive, typename MyType>\n      void calc_gradient_packet_(Stack& stack, \n\t\t\t\t const ExpressionSize<NArrays>& loc,\n\t\t\t\t const ScratchVector<NScratch,Packet<Real> >& scratch,\n\t\t\t\t ScratchVector<NActive,Packet<Real> >& gradients,\n\t\t\t\t const MyType& multiplier) const {}\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\targ.template set_location_<MyArrayNum>(i, index);\n      }\n\n    };\n  \n  } // End namespace internal\n\n#define ADEPT_DEF_UNARY_BOOL_FUNC(NAME, FUNC, RAWFUNC)\t\t\\\n  namespace internal {\t\t\t\t\t\t\\\n    template <typename Type>\t\t\t\t\t\\\n    struct NAME  {\t\t\t\t\t\t\\\n      const char* operation_string() const { return #FUNC; }\t\\\n      bool operation(const Type& val) const {\t\t\t\\\n\tusing RAWFUNC;\t\t\t\t\t\t\\\n\treturn FUNC(val); /* RAWFUNC(val); */\t\t\t\\\n      }\t\t\t\t\t\t\t\t\\\n    };\t\t\t\t\t\t\t\t\\\n  } /* End namespace internal */\t\t\t\t\t\\\n  template <class Type, class R>\t\t\t\t\t\\\n  inline\t\t\t\t\t\t\t\t\\\n  adept::internal::UnaryBoolOperation<Type, adept::internal::NAME, R>\t\\\n  FUNC(const adept::Expression<Type, R>& r){\t\t\t\t\\\n    return adept::internal::UnaryBoolOperation<Type,\t\t\t\\\n      adept::internal::NAME, R>(r.cast());\t\t\t\t\\\n  }\n\n  ADEPT_DEF_UNARY_BOOL_FUNC(IsNan,    isnan,    std::isnan)\n  ADEPT_DEF_UNARY_BOOL_FUNC(IsInf,    isinf,    std::isinf)\n  ADEPT_DEF_UNARY_BOOL_FUNC(IsFinite, isfinite, std::isfinite)\n\n  //#undef ADEPT_DEF_UNARY_BOOL_FUNC\n\n} /* End namespace adept */\n\n\n\n#endif\n"
  },
  {
    "path": "include/adept/array_shortcuts.h",
    "content": "/* array_shortcuts.h -- Definitions of \"shortcut\" typedefs for array types\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptArrayShortcuts_H\n#define AdeptArrayShortcuts_H\n\n#include <adept/Array.h>\n#include <adept/SpecialMatrix.h>\n#include <adept/FixedArray.h>\n\nnamespace adept {\n\n  // ---------------------------------------------------------------------\n  // Pretty typedefs to avoid the need for template arguments\n  // ---------------------------------------------------------------------\n\n  typedef Array<1> Vector;\n  typedef Array<2> Matrix;\n  typedef Array<3> Array3; // Deprecated\n  typedef Array<3> Array3D;\n  typedef Array<4> Array4D;\n  typedef Array<5> Array5D;\n  typedef Array<6> Array6D;\n  typedef Array<7> Array7D;\n\n  typedef Array<1,Index> IntVector;\n  typedef Array<2,Index> IntMatrix;\n  typedef Array<3,Index> IntArray3; // Deprecated\n  typedef Array<3,Index> IntArray3D;\n\n  typedef Array<1,int> intVector;\n  typedef Array<2,int> intMatrix;\n  typedef Array<3,int> intArray3; // Deprecated\n  typedef Array<3,int> intArray3D;\n  typedef Array<4,int> intArray4D;\n  typedef Array<5,int> intArray5D;\n  typedef Array<6,int> intArray6D;\n  typedef Array<7,int> intArray7D;\n\n  typedef Array<1,bool> boolVector;\n  typedef Array<2,bool> boolMatrix;\n  typedef Array<3,bool> boolArray3; // Deprecated\n  typedef Array<3,bool> boolArray3D;\n  typedef Array<4,bool> boolArray4D;\n  typedef Array<5,bool> boolArray5D;\n  typedef Array<6,bool> boolArray6D;\n  typedef Array<7,bool> boolArray7D;\n\n  typedef Array<1,float> floatVector;\n  typedef Array<2,float> floatMatrix;\n  typedef Array<3,float> floatArray3; // Deprecated\n  typedef Array<3,float> floatArray3D;\n  typedef Array<4,float> floatArray4D;\n  typedef Array<5,float> floatArray5D;\n  typedef Array<6,float> floatArray6D;\n  typedef Array<7,float> floatArray7D;\n\n  typedef SpecialMatrix<Real,internal::SquareEngine<ROW_MAJOR>,\n    false> SquareMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,0,0>,\n    false> DiagMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,1,1>,\n    false> TridiagMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,2,2>,\n    false> PentadiagMatrix;\n  typedef SpecialMatrix<Real,internal::SymmEngine<ROW_LOWER_COL_UPPER>,\n    false> SymmMatrix;\n  typedef SpecialMatrix<Real,internal::LowerEngine<ROW_MAJOR>,\n    false> LowerMatrix;\n  typedef SpecialMatrix<Real,internal::UpperEngine<ROW_MAJOR>,\n    false> UpperMatrix;\n\n  typedef FixedArray<Real,false,2> Vector2;\n  typedef FixedArray<Real,false,3> Vector3;\n  typedef FixedArray<Real,false,4> Vector4;\n  typedef FixedArray<Real,false,2,2> Matrix22;\n  typedef FixedArray<Real,false,3,3> Matrix33;\n  typedef FixedArray<Real,false,4,4> Matrix44;\n\n  // If automatic differentiation is turned off then aVector and\n  // friends become identical to their inactive counterparts\n#ifdef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n#define ADEPT_IS_ACTIVE false\n#else\n#define ADEPT_IS_ACTIVE true\n#endif\n\n  typedef Array<1,Real,ADEPT_IS_ACTIVE> aVector;\n  typedef Array<2,Real,ADEPT_IS_ACTIVE> aMatrix;\n  typedef Array<3,Real,ADEPT_IS_ACTIVE> aArray3; // Deprecated\n  typedef Array<3,Real,ADEPT_IS_ACTIVE> aArray3D;\n  typedef Array<4,Real,ADEPT_IS_ACTIVE> aArray4D;\n  typedef Array<5,Real,ADEPT_IS_ACTIVE> aArray5D;\n  typedef Array<6,Real,ADEPT_IS_ACTIVE> aArray6D;\n  typedef Array<7,Real,ADEPT_IS_ACTIVE> aArray7D;\n\n  typedef SpecialMatrix<Real,internal::SquareEngine<ROW_MAJOR>,\n    ADEPT_IS_ACTIVE> aSquareMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,0,0>,\n    ADEPT_IS_ACTIVE> aDiagMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,1,1>,\n    ADEPT_IS_ACTIVE> aTridiagMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,2,2>,\n    ADEPT_IS_ACTIVE> aPentadiagMatrix;\n  typedef SpecialMatrix<Real,internal::SymmEngine<ROW_LOWER_COL_UPPER>,\n    ADEPT_IS_ACTIVE> aSymmMatrix;\n  typedef SpecialMatrix<Real,internal::LowerEngine<ROW_MAJOR>,\n    ADEPT_IS_ACTIVE> aLowerMatrix;\n  typedef SpecialMatrix<Real,internal::UpperEngine<ROW_MAJOR>,\n    ADEPT_IS_ACTIVE> aUpperMatrix;\n\n  typedef FixedArray<Real,ADEPT_IS_ACTIVE,2>   aVector2;\n  typedef FixedArray<Real,ADEPT_IS_ACTIVE,3>   aVector3;\n  typedef FixedArray<Real,ADEPT_IS_ACTIVE,4>   aVector4;\n  typedef FixedArray<Real,ADEPT_IS_ACTIVE,2,2> aMatrix22;\n  typedef FixedArray<Real,ADEPT_IS_ACTIVE,3,3> aMatrix33;\n  typedef FixedArray<Real,ADEPT_IS_ACTIVE,4,4> aMatrix44;\n\n\n#undef ADEPT_IS_ACTIVE\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/base.h",
    "content": "/* base.h -- Basic definitions \n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015-2021 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n\n#ifndef AdeptBase_H\n#define AdeptBase_H 1\n\n#include <cstddef>\n\n\n// ---------------------------------------------------------------------\n// 0: Adept version number\n// ---------------------------------------------------------------------\n\n// The version of the Adept library is specified both as a string and\n// an integer\n#define ADEPT_VERSION      20100\n#define ADEPT_VERSION_STR \"2.1\"\n\n\n// ---------------------------------------------------------------------\n// 1: Defines not requiring a library recompile\n// ---------------------------------------------------------------------\n\n// The following can either be changed here, or define them just\n// before including this header file in your code, or define using the\n// -Dxxx compiler option.  These options to not need the library to be\n// recompiled.\n\n// A globally accessible stack needs to be present for arithmetic\n// statements to access; by default this is thread safe but if you\n// know you are running a single-threaded application then slightly\n// faster performance may be achieved by defining this. Note that in\n// section 4 of this header file, ADEPT_STACK_THREAD_UNSAFE is\n// explicitly defined on the Mac OS platform, since the executable\n// format used typically does not support thread-local storage.\n//#define ADEPT_STACK_THREAD_UNSAFE 1\n\n// Define this to check whether the \"multiplier\" is zero before it is\n// placed on the operation stack. This makes the forward pass slower\n// and the reverse pass slightly faster, and is only worthwhile if\n// many reverse passes will be carried out per forward pass (or if you\n// have good reason to believe many variables in your code are zero).\n// #define ADEPT_REMOVE_NULL_STATEMENTS 1\n\n// If using the same code for both forward-only and\n// forward-and-reverse calculations, then it is useful to be able to\n// dynamically control whether or not gradient information is computed\n// by expressions in the forward pass using the pause_recording() and\n// continue_recording() functions. To enable this feature uncomment\n// the following, but note that it slows down the forward pass a\n// little.  \n//#define ADEPT_RECORDING_PAUSABLE 1\n\n// Initialize real types to signaling NaN or zero\n//#define ADEPT_INIT_REAL_SNAN 1\n//#define ADEPT_INIT_REAL_ZERO 1\n\n// Often when you first convert a code for automatic differentiation\n// the gradients computed contain NaNs or infinities: uncommenting the\n// following will check for these and throw an error when they are\n// found, so that by running the program in a debugger and looking at\n// the backtrace, you can locate the source.\n//#define ADEPT_TRACK_NON_FINITE_GRADIENTS 1\n\n// If this is defined then each mathematical operation does not\n// involve a check whether more memory needs to be allocated; rather\n// the user first specifies how much memory to allocate to hold the\n// entire algorithm via the preallocate_statements and\n// preallocate_operations functions. This is a little faster, but is\n// obviously risky if you don't anticipate correctly how much memory\n// will be needed.\n//#define ADEPT_MANUAL_MEMORY_ALLOCATION 1\n\n// Do we check array bounds when indexing arrays?\n//#define ADEPT_BOUNDS_CHECKING 1\n\n// Do we disable dimension checking when assigning an array expression\n// to another array?\n//#define ADEPT_NO_DIMENSION_CHECKING 1\n\n// Do we disable automatic alias checking in array operations?\n//#define ADEPT_NO_ALIAS_CHECKING 1\n\n// Does adept::exp when applied to Adept types such as arrays invoke a\n// faster vectorizable exponential function?  This is not bit\n// reproducible with \"exp\" in the standard library, but the faster\n// function is always available as adept::fastexp (and this also works\n// on scalars).  Note that when applied to an Adept type, a simple\n// \"exp\" selects the function from the adept namespace.\n//#define ADEPT_FAST_EXPONENTIAL 1\n\n// The following will define the adept::exp function for the scalar\n// types \"float\" and \"double\" to call the faster exponential function,\n// bit reproducible with the vectorizable one above.  However, this\n// can cause a namespace clash as some C header files import \"exp\"\n// outside of any namespace.  Alternatively you can use adept::fastexp\n// on scalars.\n//#define ADEPT_FAST_SCALAR_EXPONENTIAL 1\n\n// A shortcut for faster execution that does not change the behaviour\n// of single-threaded bug-free code that uses the \"eval\" function in\n// case of aliasing.  ADEPT_FAST_EXPONENTIAL changes results so is not\n// activated wtih ADEPT_FAST.\n#ifdef ADEPT_FAST\n#define ADEPT_STACK_THREAD_UNSAFE 1\n#define ADEPT_NO_DIMENSION_CHECKING 1\n#define ADEPT_NO_ALIAS_CHECKING 1\n#endif\n\n// The compiler option -ffast-math turns on __FAST_MATH__ and allows\n// for optimizations that may not be bit-reproducible or do all the\n// normal error checking - Adept's fast exponential falls into this\n// category.\n#ifdef __FAST_MATH__\n#define ADEPT_FAST_EXPONENTIAL 1\n#endif\n\n// The initial size of the stacks, which can be grown if required\n#ifndef ADEPT_INITIAL_STACK_LENGTH\n#define ADEPT_INITIAL_STACK_LENGTH 1048576\n#endif\n\n// The statement and operation stacks\n#ifndef ADEPT_STACK_BLOCK_LENGTH\n#define ADEPT_STACK_BLOCK_LENGTH 1048576\n#endif\n\n//#define ADEPT_SUPPORT_HUGE_ARRAYS 1\n\n// Since subsetting an array causes a modification to the reference\n// counter in the underlying storage object, multiple threads\n// subsetting the same array can cause clashes unless the reference\n// counter is protected by a mutex. This is possible on C++11 by\n// making the reference counter of type std::atomic<int>, enabled by\n// defining the following:\n//#define ADEPT_STORAGE_THREAD_SAFE\n\n\n// ---------------------------------------------------------------------\n// 2: Defines requiring a library recompile\n// ---------------------------------------------------------------------\n\n// The \"stack\" containing derivative information can be implemented in\n// two ways: if ADEPT_STACK_STORAGE_STL is defined then C++ STL\n// containers are used, otherwise dynamically allocated arrays are\n// used.  Experience says that dynamically allocated arrays are faster.\n//#define ADEPT_STACK_STORAGE_STL 1\n\n// The number of rows/columns of a Jacobian that are calculated at\n// once. The optimum value depends on platform, the size of your\n// Jacobian and the number of OpenMP threads available.\n#ifndef ADEPT_MULTIPASS_SIZE\n//#define ADEPT_MULTIPASS_SIZE 1\n//#define ADEPT_MULTIPASS_SIZE 2\n#define ADEPT_MULTIPASS_SIZE 4\n//#define ADEPT_MULTIPASS_SIZE 8\n//#define ADEPT_MULTIPASS_SIZE 15\n//#define ADEPT_MULTIPASS_SIZE 16\n//#define ADEPT_MULTIPASS_SIZE 32\n//#define ADEPT_MULTIPASS_SIZE 64\n#endif\n\n// If ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK then the\n// Jacobian calculation will try to remove redundant loops involving\n// zeros; note that this may inhibit auto-vectorization\n#define ADEPT_MULTIPASS_SIZE_ZERO_CHECK 64\n#define PACKET_SIZE_ZERO_CHECK 64\n\n// By default the precision of differentiated expressions is \"double\".\n// To override this, define ADEPT_REAL_TYPE_SIZE to 4 (float), 8\n// (double) or 16 (long double). Note that if you specify 16 but on\n// your system \"long double\" is actually the same as double, then the\n// code will fail to compile.\n//#define ADEPT_REAL_TYPE_SIZE 8\n\n// Thread-local storage is used for the global Stack pointer to ensure\n// thread safety.  In pre-C++11 compilers, thread-local variables are\n// declared in different ways by different compilers, the most common\n// ones being detected in section 4 below.  Some platforms\n// (particularly some Mac platforms) do not implement thread-local\n// storage, and therefore on Mac thread-local storage is disabled. If\n// you want to manually specify how thread-local storage is declared,\n// you may do it here.  If thread-local storage is not available on\n// your platform but is not detected in section 4, and consequently\n// you cannot get the code to compile, then you can make an empty\n// declaration here.\n//#define ADEPT_THREAD_LOCAL thread_local\n\n// Define the following if you wish to use OpenMP to accelerate array\n// expressions\n//#define ADEPT_OPENMP_ARRAY_OPERATIONS 1\n\n// This cannot be changed without rewriting the Adept library\n#define ADEPT_MAX_ARRAY_DIMENSIONS 7\n\n// ---------------------------------------------------------------------\n// 4: Miscellaneous\n// ---------------------------------------------------------------------\n\n// Various C++11 features\n#if __cplusplus > 199711L\n// We can optimize the returning of Arrays from functions with move\n// semantics:\n#define ADEPT_MOVE_SEMANTICS 1\n// Other C++11 features such as initializer lists, thread_local\n// keyword, extra mathematical functions etc:\n#define ADEPT_CXX11_FEATURES 1\n#elif defined(_MSVC_LANG)\n// Microsoft will only update __cplusplus when all C++11 features are\n// included\n#if _MSVC_LANG > 199711L\n#define ADEPT_MOVE_SEMANTICS 1\n#define ADEPT_CXX11_FEATURES 1\n#endif\n#endif\n\n// Check C++11 is being used if thread-safe array storage is required\n#ifdef ADEPT_STORAGE_THREAD_SAFE\n#ifndef ADEPT_CXX11_FEATURES\n#error \"Thread-safe array storage is only available with C++11\"\n#endif\n#endif\n\n// The following attempt to align the data to facilitate SSE2\n// vectorization did not work so is disabled\n#ifdef __GNUC__\n//#define ADEPT_SSE2_ALIGNED __attribute__ ((aligned (16)))\n#define ADEPT_SSE2_ALIGNED\n#else\n#define ADEPT_SSE2_ALIGNED\n#endif\n\n// The way thread-local variables are specified pre-C++11 is compiler\n// specific.  You can specify this manually by defining the\n// ADEPT_THREAD_LOCAL preprocessor variable in the previous section,\n// otherwise it is defined here depending on your compiler\n#ifndef ADEPT_THREAD_LOCAL\n  #ifdef __APPLE__\n    #ifdef __GNUC__\n      // GNU C++11 compiler on Mac should support thread_local\n      #ifdef ADEPT_CXX11_FEATURES\n        #define ADEPT_THREAD_LOCAL thread_local\n      #endif\n    #elif defined(__has_feature)\n      // Clang supports \"__has_feature\": check if thread_local is\n      // available\n     #if __has_feature(cxx_thread_local)\n        #define ADEPT_THREAD_LOCAL thread_local\n      #endif\n    #endif\n    // When thread_local is unavailable we turn it off and provide a\n    // blank definition of ADEPT_THREAD_LOCAL.\n    #ifndef ADEPT_THREAD_LOCAL\n      #define ADEPT_STACK_THREAD_UNSAFE 1\n      #define ADEPT_THREAD_LOCAL\n    #endif\n  #elif defined(ADEPT_CXX11_FEATURES)\n    // C++11 has thread_local as part of the language, and should be\n    // supported on non-Mac C++11 platforms\n    #define ADEPT_THREAD_LOCAL thread_local\n  #elif defined(_MSC_VER)\n    // Microsoft C++98 has a different way to specify thread-local\n    // storage from the GCC/Intel/Sun/IBM compilers.\n    #define ADEPT_THREAD_LOCAL __declspec(thread)\n  #else\n    // The following should work on GCC/Intel/Sun/IBM C++98 compilers\n    #define ADEPT_THREAD_LOCAL __thread\n  #endif\n#endif\n\n// If we use OpenMP to parallelize array expressions then some\n// variables local to active operation structures (Multiply etc) need\n// to be made thread-local\n#ifdef ADEPT_OPENMP_ARRAY_OPERATIONS\n#define ADEPT_THREAD_LOCAL_IF_OPENMP ADEPT_THREAD_LOCAL\n#else\n#define ADEPT_THREAD_LOCAL_IF_OPENMP\n#endif\n\n// Currently the design of the stack means that automatic\n// differentiation of matrix multiplication is very inefficient. A\n// future version of Adept will redesign the stack to store directives\n// enabling efficient implementation of the derivative of a matrix\n// multiplication, and this will be applicable to different types of\n// matrix (dense, symmetric, banded, upper and lower). But for now,\n// only differentiation of dense active matrices\n// (i.e. Array<2,Real,true>) is implemented.  Therefore other types\n// of active matrix need to be converted to this type before they can\n// be used in matrix multiplication.\n#define ADEPT_ONLY_DIFFERENTIATE_DENSE_MATRIX_MULTIPLICATION 1\n\n// To find bugs it can be useful to initialize arrays to signaling\n// NaNs, in which case ADEPT_INIT_REAL is set and used internally\n#ifdef ADEPT_INIT_REAL_SNAN\n#define ADEPT_INIT_REAL std::numeric_limits<T>::signaling_NaN()\n#elif defined(ADEPT_INIT_REAL_ZERO)\n#define ADEPT_INIT_REAL 0.0\n#endif\n\n// ---------------------------------------------------------------------\n// 5: Define basic floating-point and integer types\n// ---------------------------------------------------------------------\nnamespace adept {\n\n  // An older version of Adept used ADEPT_FLOATING_POINT_TYPE to\n  // define alternative underlying types for \"Real\", but unfortunately\n  // the preprocessor cannot check if a preprocessor variable is of\n  // type \"long double\", so a numerical value is used instead\n#ifdef ADEPT_FLOATING_POINT_TYPE\n#undef ADEPT_FLOATING_POINT_TYPE\n#error ADEPT_FLOATING_POINT_TYPE is deprecated: use ADEPT_REAL_TYPE_SIZE instead\n#endif\n\n#ifndef ADEPT_REAL_TYPE_SIZE\n#define ADEPT_REAL_TYPE_SIZE 8\n#endif\n\n#if ADEPT_REAL_TYPE_SIZE == 4\n  typedef float Real;\n#elif ADEPT_REAL_TYPE_SIZE == 8\n  typedef double Real;\n#elif ADEPT_REAL_TYPE_SIZE == 16\n  typedef long double Real;\n#else\n#undef ADEPT_REAL_TYPE_SIZE\n#error If defined, ADEPT_REAL_TYPE_SIZE must be 4 (float), 8 (double) or 16 (long double)\n#endif\n\n  // By default sizes of arrays, indices to them, and indices in the\n  // automatic differentiation stack are stored as 4-byte integers,\n  // but for very large arrays and algorithms, larger types may be\n  // needed.  Remember that on 32-bit platforms this will have no\n  // effect.\n#ifdef ADEPT_SUPPORT_HUGE_ARRAYS\n  typedef std::size_t  uIndex; // Unsigned\n  typedef std::ptrdiff_t Index;  // Signed\n#else\n  //  typedef unsigned int uIndex;\n  typedef int uIndex;\n  typedef int Index;\n#endif\n\n  // ---------------------------------------------------------------------\n  // 6: Disable stupid warnings\n  // ---------------------------------------------------------------------\n\n#ifdef __INTEL_COMPILER\n// \"type qualifiers are meaningless here\"\n#pragma warning disable 2536\n#elif defined(_MSC_VER)\n// \"multiple copy constructors specified\"\n#pragma warning( disable : 4521 )\n#endif\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/contiguous_matrix.h",
    "content": "/* contiguous_matrix.h -- Return matrix with contiguous storage\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptContiguousMatrix_H\n#define AdeptContiguousMatrix_H 1\n\n#include <adept/Array.h>\n\nnamespace adept {\n  namespace internal {\n    \n    // If for input into BLAS or LAPACK a matrix is required to have\n    // one dimension contiguous and increasing in memory, then call\n    // this function: if the matrix has this property then the\n    // returned matrix in \"out\" will be linked to the input matrix;\n    // otherwise, \"out\" will be a copy of \"in\" but satisfying this\n    // condition. The returned \"order\" is ROW_MAJOR or COL_MAJOR\n    // stating the storage type of the returned matrix.\n    template <typename T, bool IsActive>\n    MatrixStorageOrder contiguous_matrix(Array<2,T,IsActive>& in, \n\t\t\t\t\t Array<2,T,IsActive>& out,\n\t\t\t\t\t Index& stride) {\n      MatrixStorageOrder order = ROW_MAJOR;\n      if (in.empty()) {\n\tthrow(invalid_operation(\"Input matrix must not be empty\"));\n      }\n      if (in.dimension(1) == 1) {\n\tout.link(in);\n\tstride = in.offset(0);\n      }\n      else if (in.dimension(0) == 1) {\n\torder = COL_MAJOR;\n\tout.link(in);\n\tstride = in.offset(1);\n      }\n      else {\n\tout.resize_row_major(in.dimensions());\n\tout = in;\n\tstride = in.offset(0);\n      }\n      return order;\n    }\n\n    // As contiguous_matrix but checks that the input matrix is square\n    template <typename T, bool IsActive>\n    MatrixStorageOrder contiguous_square_matrix(Array<2,T,IsActive>& in, \n\t\t\t\t\t\tArray<2,T,IsActive>& out,\n\t\t\t\t\t\tIndex& stride) {\n      if (in.dimension(0) != in.dimension(1)) {\n\tthrow(invalid_operation(\"Square matrix required\"));\n      }\n      return contiguous_matrix(in, out, stride);\n    }\n\n  }\n}\n\n\n#endif\n"
  },
  {
    "path": "include/adept/cppblas.h",
    "content": "/* cppblas.h -- C++ interface to BLAS functions\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   This file provides a C++ interface to selected Level-2 and -3 BLAS\n   functions in which the precision of the arguments (float versus\n   double) is inferred via overloading\n\n*/\n\n#ifndef AdeptCppBlas_H\n#define AdeptCppBlas_H 1\n\nnamespace adept {\n\n  namespace internal {\n\n    typedef bool BLAS_ORDER;\n    typedef char BLAS_TRANSPOSE;\n    typedef char BLAS_UPLO;\n    typedef char BLAS_SIDE;\n\n    static const BLAS_ORDER     BlasRowMajor  = false;\n    static const BLAS_ORDER     BlasColMajor  = true;\n    static const BLAS_TRANSPOSE BlasNoTrans   = 'N';\n    static const BLAS_TRANSPOSE BlasTrans     = 'T';\n    static const BLAS_TRANSPOSE BlasConjTrans = 'C';\n    static const BLAS_UPLO      BlasUpper     = 'U';\n    static const BLAS_UPLO      BlasLower     = 'L';\n    static const BLAS_SIDE      BlasLeft      = 'L';\n    static const BLAS_SIDE      BlasRight     = 'R';\n\n    // Matrix-matrix multiplication for general dense matrices\n#define ADEPT_DEFINE_GEMM(T)\t\t\t\t\t\\\n    void cppblas_gemm(const BLAS_ORDER Order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const BLAS_TRANSPOSE TransB,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const int K, const T alpha, const T *A,\t\\\n\t\t      const int lda, const T *B, const int ldb,\t\\\n\t\t      const T beta, T *C, const int ldc);\n    ADEPT_DEFINE_GEMM(double)\n    ADEPT_DEFINE_GEMM(float)\n#undef ADEPT_DEFINE_GEMM\n    \n    // Matrix-vector multiplication for a general dense matrix\n#define ADEPT_DEFINE_GEMV(T)\t\t\t\t\t\\\n    void cppblas_gemv(const BLAS_ORDER order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const T alpha, const T *A, const int lda,\t\\\n\t\t      const T *X, const int incX, const T beta,\t\\\n\t\t      T *Y, const int incY);\n    ADEPT_DEFINE_GEMV(double)\n    ADEPT_DEFINE_GEMV(float)\n#undef ADEPT_DEFINE_GEMV\n    \n    // Matrix-matrix multiplication where matrix A is symmetric\n#define ADEPT_DEFINE_SYMM(T)\t\t\t\t\t\\\n    void cppblas_symm(const BLAS_ORDER Order,\t\t\t\\\n\t\t      const BLAS_SIDE Side,\t\t\t\\\n\t\t      const BLAS_UPLO Uplo,\t\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const T alpha, const T *A, const int lda,\t\\\n\t\t      const T *B, const int ldb, const T beta,\t\\\n\t\t      T *C, const int ldc);\n    ADEPT_DEFINE_SYMM(double)\n    ADEPT_DEFINE_SYMM(float)\n#undef ADEPT_DEFINE_SYMM\n    \n    // Matrix-vector multiplication where the matrix is symmetric\n#define ADEPT_DEFINE_SYMV(T)\t\t\t\t\t\\\n    void cppblas_symv(const BLAS_ORDER order,\t\t\t\\\n\t\t      const BLAS_UPLO Uplo,\t\t\t\\\n\t\t      const int N, const T alpha, const T *A,\t\\\n\t\t      const int lda, const T *X, const int incX,\\\n\t\t      const T beta, T *Y, const int incY);\n    ADEPT_DEFINE_SYMV(double)\n    ADEPT_DEFINE_SYMV(float)\n#undef ADEPT_DEFINE_SYMV\n    \n    // Matrix-vector multiplication for a general band matrix\n#define ADEPT_DEFINE_GBMV(T)\t\t\t\t\t\\\n    void cppblas_gbmv(const BLAS_ORDER order,\t\t\t\\\n\t\t      const BLAS_TRANSPOSE TransA,\t\t\\\n\t\t      const int M, const int N,\t\t\t\\\n\t\t      const int KL, const int KU, const T alpha,\\\n\t\t      const T *A, const int lda, const T *X,\t\\\n\t\t      const int incX, const T beta, T *Y,\t\\\n\t\t      const int incY);\n    ADEPT_DEFINE_GBMV(double)\n    ADEPT_DEFINE_GBMV(float)\n#undef ADEPT_DEFINE_GBMV\n\n  } // End namespace internal\n\n} // End namespace adept\n\n\n#endif\n"
  },
  {
    "path": "include/adept/eval.h",
    "content": "/* eval.h -- Convert expression to array to avoid aliasing issues\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptEval_H\n#define AdeptEval_H\n\n#include <adept/Array.h>\n\nnamespace adept {\n\n  // Copy an expression to an Array of the same rank, type and\n  // activeness\n  template <typename EType, class E>\n  typename internal::enable_if<(E::rank > 0), Array<E::rank,EType,E::is_active> >::type\n  eval(const Expression<EType,E>& e) {\n    Array<E::rank,EType,E::is_active> a;\n    a = e.cast();\n    return a;\n  }\n\n  // Equivalent for scalar expressions; not really needed\n  /*\n  template <typename EType, class E>\n  typename internal::enable_if<E::rank==0 && !E::is_active, EType>::type\n  eval(const Expression<EType,E>& e) {\n    return static_cast<EType>(e);\n  }\n\n  template <typename EType, class E>\n  typename internal::enable_if<E::rank==0 && E::is_active, Active<EType> >::type\n  eval(const Expression<EType,E>& e) {\n    return static_cast<Active<EType> >(e);\n  }\n  */\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/exception.h",
    "content": "/* exception.h -- Exceptions thrown by Adept library\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   Adept functions can throw exceptions that are all derived either\n   from the adept::autodiff_exception or adept::array_exception types,\n   themselves inherited from the adept::exception type.  All implement\n   the \"what()\" function to return an error message.\n\n*/\n\n#ifndef AdeptException_H\n#define AdeptException_H 1\n\n#include <exception>\n#include <string>\n#include <sstream>\n\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // adept::exception class from which all others are derived\n  // -------------------------------------------------------------------\n  class exception : public std::exception {\n  public:\n    virtual const char* what() const throw() { return message_.c_str(); }\n    virtual ~exception() throw() { }\n  protected:\n    std::string message_;\n  };\n\n  class feature_not_available : public adept::exception {\n  public:\n    feature_not_available(const std::string& message = \"Feature not available\")\n    { message_ = message; }\n  };\n\n  // -------------------------------------------------------------------\n  // autodiff_exception and child classes\n  // -------------------------------------------------------------------\n\n  // The autodiff_exception type is only used as a base for more\n  // specific exceptions\n  class autodiff_exception : public adept::exception { };\n\n  // Now we define the various specific autodiff exceptions that can\n  // be thrown.\n  class gradient_out_of_range : public autodiff_exception {\n  public:\n    gradient_out_of_range(const std::string& message \n\t  = \"Gradient index out of range: probably aReal objects have been created after a set_gradient(s) call\")\n    { message_ = message; }\n  };\n\n  class gradients_not_initialized : public autodiff_exception {\n  public:\n    gradients_not_initialized(const std::string& message \n\t      = \"Gradients not initialized: at least one call to set_gradient(s) is needed before a forward or reverse pass\")\n    { message_ = message; }\n  };\n\n  class stack_already_active : public autodiff_exception {\n  public:\n    stack_already_active(const std::string& message \n\t = \"Attempt to activate an adept::Stack when one is already active in this thread\")\n    { message_ = message; }\n  };\n\n  class dependents_or_independents_not_identified : public autodiff_exception {\n  public:\n    dependents_or_independents_not_identified(const std::string& message \n\t = \"Dependent or independent variables not identified before a Jacobian computation\")\n    { message_ = message; }\n  };\n\n  class wrong_gradient : public autodiff_exception {\n  public:\n    wrong_gradient(const std::string& message\n\t  = \"Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call\")\n    { message_ = message; }\n  };\n\n  class non_finite_gradient : public autodiff_exception {\n  public:\n    non_finite_gradient(const std::string& message\n\t= \"A non-finite gradient has been computed\")\n    { message_ = message; }\n  };\n\n\n  // -------------------------------------------------------------------\n  // array_exception and child classes\n  // -------------------------------------------------------------------\n\n  // The array_exception type\n  class array_exception : public adept::exception { \n  public:\n    array_exception(const std::string& message\n\t\t    = \"A misuse of arrays occurred\")\n    { message_ = message; }\n  };\n\n  class size_mismatch : public array_exception {\n  public:\n    size_mismatch(const std::string& message\n\t\t  = \"Array sizes do not match in array expression\")\n    { message_ = message; }\n  };\n\n  class inner_dimension_mismatch : public array_exception {\n  public:\n    inner_dimension_mismatch(const std::string& message\n\t  = \"Inner dimensions don't agree in matrix multiplication\")\n    { message_ = message; }\n  };\n\n  class empty_array : public array_exception {\n  public:\n    empty_array(const std::string& message\n\t= \"Use of empty array where non-empty array required\")\n    { message_ = message; }\n  };\n\n  class invalid_dimension : public array_exception {\n  public:\n    invalid_dimension(const std::string& message\n\t= \"Attempt to create array with invalid dimension\")\n    { message_ = message; }\n  };\n\n  class index_out_of_bounds : public array_exception {\n  public:\n    index_out_of_bounds(const std::string& message\n\t= \"Array index is out of bounds\")\n    { message_ = message; }\n  };\n\n  class invalid_operation : public array_exception {\n  public:\n    invalid_operation(const std::string& message\n      = \"Operation not permitted for this type of array\")\n    { message_ = message; }\n  };\n\n  class matrix_ill_conditioned : public array_exception {\n  public:\n    matrix_ill_conditioned(const std::string& message\n      = \"Matrix ill conditioned\")\n    { message_ = message; }\n  };\n\n  class fortran_interoperability_error : public array_exception {\n  public:\n    fortran_interoperability_error(const std::string& message\n\t\t\t\t   = \"Fortran interoperability error\")\n    { message_ = message; }\n  };\n\n  \n  // -------------------------------------------------------------------\n  // optimization_exception\n  // -------------------------------------------------------------------\n\n  // The optimization_exception type\n  class optimization_exception : public adept::exception { \n  public:\n    optimization_exception(const std::string& message\n\t\t    = \"Optimization/minimization error\")\n    { message_ = message; }\n  };\n\n\n  // -------------------------------------------------------------------\n  // Provide location of where exception was thrown\n  // -------------------------------------------------------------------\n\n  // The following enables the file name and line number to be reported\n  // with something like \n  //   throw array_exception(\"Bad matrix\" ADEPT_EXCEPTION_LOCATION)\n#define ADEPT_EXCEPTION_LOCATION \\\n  +adept::internal::exception_location(__FILE__,__LINE__)\n\n  // A string with location information to append to the error message\n  namespace internal {\n    inline\n    std::string exception_location(const char* file, int line) {\n      std::stringstream s;\n      s << \" (in \" << file << \":\" << line << \")\";\n      return s.str();      \n    }\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/interp.h",
    "content": "/* interp.h -- 1D interpolation\n\n    Copyright (C) 2015- European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptInterp_H\n#define AdeptInterp_H\n\n#include <adept/Array.h>\n\nnamespace adept {\n\n  namespace internal {\n    typedef unsigned int uint;\n  };\n  \n  // The interpolation scheme and extrapolation behaviours are passed\n  // in as one \"options\" argument with a bitwise OR. The lowest four\n  // bits specify the extrapolation policy and the remaining bits the\n  // interpolation scheme.\n  static const internal::uint ADEPT_INTERPOLATE_LINEAR  = 0u; // Default\n  static const internal::uint ADEPT_INTERPOLATE_NEAREST = (1u<<4);\n\n  static const internal::uint ADEPT_EXTRAPOLATE_DEFAULT  = 0u;\n  static const internal::uint ADEPT_EXTRAPOLATE_LINEAR   = 1u; // Default for linear interp \n  static const internal::uint ADEPT_EXTRAPOLATE_CLAMP    = 2u; // Default for nearest-neighbour\n  // Return a constant for out-of-bounds inputs, or NaN if the\n  // constant is not specified\n  static const internal::uint ADEPT_EXTRAPOLATE_CONSTANT = 3u;\n\n  // A bitwise AND of the \"options\" argument with one of the following\n  // will extract the component associated with interpolation and\n  // extrapolation\n  namespace internal {\n    static const internal::uint ADEPT_EXTRAPOLATE_MASK = 15; // Binary 1111\n    static const internal::uint ADEPT_INTERPOLATE_MASK = ~ADEPT_EXTRAPOLATE_MASK;\n\n    inline void extract_interp_extrap(uint options, uint& interp_scheme, uint& extrap_policy) {\n      interp_scheme = options & ADEPT_INTERPOLATE_MASK;\n      extrap_policy = options & ADEPT_EXTRAPOLATE_MASK;\n      if (interp_scheme != ADEPT_INTERPOLATE_LINEAR\n\t  && interp_scheme != ADEPT_INTERPOLATE_NEAREST) {\n\tthrow array_exception(\"Interpolation scheme not understood\");\n      }\n      else if (extrap_policy > ADEPT_EXTRAPOLATE_CONSTANT) {\n\tthrow array_exception(\"Extrapolation policy not understood\");\n      }\n      else if (interp_scheme == ADEPT_INTERPOLATE_NEAREST\n\t       && extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\tthrow array_exception(\"Linear extrapolation not available with nearest-neighbour interpolation\");\n      }\n      else if (extrap_policy == ADEPT_EXTRAPOLATE_DEFAULT) {\n\tif (interp_scheme == ADEPT_INTERPOLATE_LINEAR) {\n\t  extrap_policy = ADEPT_EXTRAPOLATE_LINEAR;\n\t}\n\telse {\n\t  extrap_policy = ADEPT_EXTRAPOLATE_CLAMP;\n\t}\n      }\n    }\n\n    // The dimensions of an array containing the data to be\n    // interpolated may be described either by a vector of real\n    // numbers, or by a regular range; any other type will not\n    // compile.  A regular range (which could be expressed by a\n    // LinSpace object) has not yet been defined.\n    template <typename T>\n    struct InterpHelper {\n      static const bool is_valid = false;\n    };\n\n    // Specialization for a vector of real numbers    \n    template <typename XType>\n    struct InterpHelper<Array<1,XType,false> > {\n      static const bool is_valid = is_floating_point<XType>::value;\n      template <typename XiType>\n      static void interp_get_indices_weights(const Array<1,XType,false>& x,\n\t\t\t\t const Array<1,XiType,false>& xi,\n\t\t\t\t internal::uint interp_scheme,\n\t\t\t\t internal::uint extrap_policy,\n\t\t\t\t Array<1,Index>& ind0, Array<1,Real,false>& weight0,\n\t\t\t\t Array<1,bool>& is_valid) {\n\tif (x(1) > x(0)) {\n\t  // Normal ordering; loop over points to be interpolated\n\t  for (Index i = 0; i < xi.size(); ++i) {\n\t    const XiType xii = xi(i);\n\t    if (xii >= x(0) && xii <= x(end)) {\n\t      // Point is in the range of the interpolated function\n\t      Index jj = 0;\n\t      while (jj < x.size()-2 && x(jj+1) < xii) {\n\t\t++jj;\n\t      }\n\t      ind0(i) = jj;\n\t      weight0(i) = (x(jj+1)-xii)/(x(jj+1)-x(jj));\n\t    }\n\t    else if (xii < x(0)) {\n\t      // Point is off the low end of the scale\n\t      ind0(i) = 0;\n\t      if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t\tweight0(i) = (x(1)-xii)/(x(1)-x(0));\n\t      }\n\t      else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t\tweight0(i) = 1.0;\n\t      }\n\t      else {\n\t\tis_valid(i) = false;\n\t      }\n\t    }\n\t    else {\n\t      // Point is off the high end of the scale\n\t      ind0(i) = x.size()-2;\n\t      if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t\tweight0(i) = (x(end)-xii)/(x(end)-x(end-1));\n\t      }\n\t      else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t\tweight0(i) = 0.0;\n\t      }\n\t      else {\n\t\tis_valid(i) = false;\n\t      }\n\t    }\n\t  }\n\t}\n\telse {\n\t  // Reverse ordering; loop over points to be interpolated\n\t  for (Index i = 0; i < xi.size(); ++i) {\n\t    const XiType xii = xi(i);\n\t    if (xii <= x(0) && xii >= x(end)) {\n\t      // Point is in the range of the interpolated function\n\t      Index jj = x.size()-2;\n\t      while (jj > 0 && x(jj) < xii) {\n\t\t--jj;\n\t      }\n\t      ind0(i) = jj;\n\t      weight0(i) = (x(jj+1)-xii)/(x(jj+1)-x(jj));\n\t    }\n\t    else if (xii > x(0)) {\n\t      // Point is off the scale (high in x, low in index)\n\t      ind0(i) = 0;\n\t      if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t\tweight0(i) = (x(1)-xii)/(x(1)-x(0));\n\t      }\n\t      else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t\tweight0(i) = 1.0;\n\t      }\n\t      else {\n\t\tis_valid(i) = false;\n\t      }\n\t    }\n\t    else {\n\t      // Point is off the scale (low in x, high in index)\n\t      ind0(i) = x.size()-2;\n\t      if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t\tweight0(i) = (x(end)-xii)/(x(end)-x(end-1));\n\t      }\n\t      else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t\tweight0(i) = 0.0;\n\t      }\n\t      else {\n\t\tis_valid(i) = false;\n\t      }\n\t    }\t    \n\t  }\n\t}\n\t// Not very efficient implementation of nearest-neighbour\n\t// interpolation: round the weights from linear interpolation\n\tif (interp_scheme == ADEPT_INTERPOLATE_NEAREST) {\n\t  weight0 = round(weight0);\n\t}\n      }\n    };\n  }\n  \n  // 1D interpolation: interp1(x,y,xi) interpolates to obtain values of\n  // y (whose first dimension is at the points in vector x)\n  // interpolated to the values in vector xi. If y has more than one\n  // dimension then multiple values are interpolated for every point\n  // in xi, and the returned array has a size equal to y except that\n  // the first dimension is of the same length as xi. If the\n  // extrapolate policy is specified and is ADEPT_EXTRAPOLATE_CLAMP\n  // then values outside the range will be clampted at the first or\n  // last point. If it is ADEPT_EXTRAPOLATE_CONSTANT then a constant\n  // value will be used which can be specified as the final argument,\n  // or is a signaling NaN by default.  Otherwise, linear\n  // extrapolation is performed (the default). Note that x and xi must\n  // be inactive variables, but y can be active in which case the\n  // returned array will be too.\n  template <typename XType, typename YType, bool YIsActive, typename XiType, int YDims>\n  Array<YDims,YType,YIsActive>\n  interp(const Array<1,XType,false>& x,\n\t const Array<YDims,YType,YIsActive>& y,\n\t const Array<1,XiType,false>& xi,\n\t internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT,\n\t YType extrap_value = std::numeric_limits<YType>::signaling_NaN()) {\n    \n    ExpressionSize<YDims> ans_dims = y.dimensions();\n    ans_dims[0] = xi.size();\n    Array<YDims,YType,YIsActive> ans(ans_dims);\n    if (x.size() != y.size(0)) {\n      throw(size_mismatch(\"Interpolation vector x must have same length of first dimension of y in interp\"));\n    }\n    else if (x.size() == 0) {\n      throw(size_mismatch(\"Interpolation from empty vectors\"));\n    }\n    else if (x.size() == 1) {\n      // Input arrays are at a single point: copy this point into all\n      // output points regardless of their x coordinate\n      for (int ii = 0; ii < xi.size(); ++ii) {\n\tans[ii] = y[0];\n      }\n      return ans;\n    }\n\n    internal::uint interp_scheme, extrap_policy;\n    internal::extract_interp_extrap(options, interp_scheme, extrap_policy);\n    \n    if (x(0) < x(1)) {\n      // Normal ordering\n      for (Index i = 0; i < xi.size(); i++) {\n\tReal xii = xi(i);\n\tIndex jmin = 0;\n\tIndex jmax = x.size()-1;\n\tif (xii <= x(0)) {\n\t  if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t    // Extrapolate leftwards\n\t    jmax = 1;\n\t  }\n\t  else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t    // Clamp at first value\n\t    ans[i] = y[0];\n\t    continue;\n\t  }\n\t  else {\n\t    ans[i] = extrap_value;\n\t    continue;\n\t  }\n\t}\n\telse if (xii >= x(jmax)) {\n\t  if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t    // Extrapolate rightwards\n\t    jmin = jmax-1;\n\t  }\n\t  else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t    // Clamp at final value\n\t    ans[i] = y[jmax];\n\t    continue;\n\t  }\n\t  else {\n\t    ans[i] = extrap_value;\n\t    continue;\n\t  }\n\t}\n\telse {\n\t  // xii lies within x\n\t  // Find pair in which xi sits\n\t  while (jmax > jmin+1) {\n\t    Index jmid = jmin + (jmax-jmin)/2;\n\t    if (xii > x(jmid)) {\n\t      jmin = jmid;\n\t    }\n\t    else {\n\t      jmax = jmid;\n\t    }\n\t  }\n\t}\n\tif (interp_scheme == ADEPT_INTERPOLATE_LINEAR) {\n\t  // Found value: linearly interpolate. Note that we need\n\t  // square brackets here because ans and y may have more than\n\t  // one dimension in which case we want to slice them\n\t  // returning a lower dimensional array\n\t  ans[i] = ((xii-x(jmin))*y[jmax] + (x(jmax)-xii)*y[jmin])\n\t    / (x(jmax)-x(jmin));\n\t}\n\telse if (xii-x(jmin) > x(jmax)-xii) {\n\t  // Nearest neighbour is at next point\n\t  ans[i] = y[jmax];\n\t}\n\telse {\n\t  // Nearest neighbour is at previous point\n\t  ans[i] = y[jmin];\n\t}\n      }\n    }\n    else {\n      // Reverse ordering\n      for (Index i = 0; i < xi.size(); i++) {\n\tReal xii = xi(i);\n\tIndex jmin = 0;\n\tIndex jmax = x.size()-1;\n\tif (xii >= x(0)) {\n\t  if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t    // Extrapolate leftwards\n\t    jmax = 1;\n\t  }\n\t  else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t    // Clamp at first value\n\t    ans[i] = y[0];\n\t    continue;\n\t  }\n\t  else {\n\t    ans[i] = extrap_value;\n\t    continue;\n\t  }\n\t}\n\telse if (xii <= x(jmax)) {\n\t  if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) {\n\t    // Extrapolate rightwards\n\t    jmin = jmax-1;\n\t  }\n\t  else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) {\n\t    // Clamp at last value\n\t    ans[i] = y[jmax];\n\t    continue;\n\t  }\n\t  else {\n\t    ans[i] = extrap_value;\n\t    continue;\n\t  }\n\t}\n\telse {\n\t  // xii lies within x\n\t  // Find pair in which xi sits\n\t  while (jmax > jmin+1) {\n\t    Index jmid = jmin + (jmax-jmin)/2;\n\t    if (xii < x(jmid)) {\n\t      jmin = jmid;\n\t    }\n\t    else {\n\t      jmax = jmid;\n\t    }\n\t  }\n\t}\n\tif (interp_scheme == ADEPT_INTERPOLATE_LINEAR) {\n\t  // Found value: linearly interpolate (all weights here are\n\t  // negative)\n\t  ans[i] = ((xii-x(jmin))*y[jmax] + (x(jmax)-xii)*y[jmin])\n\t    / (x(jmax)-x(jmin));\n\t}\n\telse if (xii-x(jmin) < x(jmax)-xii) {\n\t  // Nearest neighbour is at next point\n\t  ans[i] = y[jmax];\n\t}\n\telse {\n\t  // Nearest neighbour is at previous point\n\t  ans[i] = y[jmin];\n\t}\n      }\n    }\n    return ans;\n  }\n\n  // Ensure that 1D interpolation works if expressions are provided\n  // for any of the arguments; these are converted to temporary\n  // arrays.\n  template <typename XType, typename YType, typename XiType,\n\t    class X, class Y, class Xi>\n  Array<Y::rank,YType,Y::is_active>\n  interp(const Expression<XType,X>& x,\n\t const Expression<YType,Y>& y,\n\t const Expression<XiType,Xi>& xi,\n\t internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT,\n\t YType extrap_value = std::numeric_limits<YType>::signaling_NaN()) {\n    const Array<1,XType,false> x2(x.cast());\n    const Array<Y::rank,YType,Y::is_active> y2(y.cast());\n    const Array<1,XiType,false> xi2(xi.cast());\n    return interp(x2, y2, xi2, options, extrap_value);\n  }\n\n  // 1D logarithmic interpolation: interpolate log(Y) and then\n  // exponentiate the result.\n  template <typename XType, typename YType, bool YIsActive, typename XiType>\n  Array<1,YType,YIsActive>\n  log_interp(const Array<1,XType,false>& x,\n\t const Array<1,YType,YIsActive>& y,\n\t const Array<1,XiType,false>& xi) {\n    using std::exp;\n    using std::log;\n\n    int length = xi.size();\n    Array<1,YType,YIsActive> ans(length);\n    if (x.size() != y.size()) {\n      throw(size_mismatch(\"Interpolation vectors must be the same length in log_interp\"));\n    }\n\n    if (x(0) < x(1)) {\n      // Normal ordering\n      for (Index i = 0; i < length; i++) {\n\tReal xii = xi(i);\n\tIndex jmin = 0;\n\tIndex jmax = x.size()-1;\n\tif (xii <= x(0)) {\n\t  // Extrapolate leftwards\n\t  jmax = 1;\n\t}\n\telse if (xii >= x(jmax)) {\n\t  // Extrapolate rightwards\n\t  jmin = jmax-1;\n\t}\n\telse {\n\t  // xii lies within x\n\t  // Find pair in which xi sits\n\t  while (jmax > jmin+1) {\n\t    Index jmid = jmin + (jmax-jmin)/2;\n\t    if (xii > x(jmid)) {\n\t      jmin = jmid;\n\t    }\n\t    else {\n\t      jmax = jmid;\n\t    }\n\t  }\n\t}\n\t// Found value: logarithmically interpolate\n\tif (y(jmax) > 0.0 && y(jmin) > 0.0) {\n\t  YType log_y_jmax = log(y(jmax));\n\t  YType log_y_jmin = log(y(jmin));\n\t  ans(i) = exp(((xii-x(jmin))*log_y_jmax + (x(jmax)-xii)*log_y_jmin)\n\t\t       / (x(jmax)-x(jmin)));\n\t}\n\telse {\n\t  // Interpolate linearly since one or both values is zero\n\t  ans(i) = ((xii-x(jmin))*y(jmax) + (x(jmax)-xii)*y(jmin))\n\t    / (x(jmax)-x(jmin));\n\t}\n      }\n    }\n    else {\n      // Reverse ordering\n      for (Index i = 0; i < length; i++) {\n\tReal xii = xi(i);\n\tIndex jmin = 0;\n\tIndex jmax = x.size()-1;\n\tif (xii >= x(0)) {\n\t  // Extrapolate leftwards\n\t  jmax = 1;\n\t}\n\telse if (xii <= x(jmax)) {\n\t  // Extrapolate rightwards\n\t  jmin = jmax-1;\n\t}\n\telse {\n\t  // xii lies within x\n\t  // Find pair in which xi sits\n\t  while (jmax > jmin+1) {\n\t    Index jmid = jmin + (jmax-jmin)/2;\n\t    if (xii < x(jmid)) {\n\t      jmin = jmid;\n\t    }\n\t    else {\n\t      jmax = jmid;\n\t    }\n\t  }\n\t}\n\t// Found value: logarithmically interpolate\n\tif (y(jmax) > 0.0 && y(jmin) > 0.0) {\n\t  YType log_y_jmax = log(y(jmax));\n\t  YType log_y_jmin = log(y(jmin));\n\t  ans(i) = exp(((xii-x(jmin))*log_y_jmax + (x(jmax)-xii)*log_y_jmin)\n\t\t       / (x(jmax)-x(jmin)));\n\t}\n\telse {\n\t  // Interpolate linearly since one or both values is zero\n\t  ans(i) = ((xii-x(jmin))*y(jmax) + (x(jmax)-xii)*y(jmin))\n\t    / (x(jmax)-x(jmin));\n\t}\n      }\n    }\n    return ans;\n  }\n\n  // 2D interpolation: as 1D interpolation but with two vectors\n  // describing the dimensions of the interpolation array and two\n  // vectors providing points at which interpolated values are\n  // required\n  template <typename XType, typename YType,\n\t    int MDims, typename MType, bool MIsActive,\n\t    typename XiType, typename YiType>\n  Array<MDims-1,MType,MIsActive>\n  interp2d(const XType& x,\n\t   const YType& y,\n\t   const Array<MDims,MType,MIsActive>& M,\n\t   const Array<1,XiType,false>& xi,\n\t   const Array<1,YiType,false>& yi,\n\t   internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT,\n\t   MType extrap_value = std::numeric_limits<MType>::signaling_NaN()) {\n\n    ADEPT_STATIC_ASSERT(MDims >= 2, TWO_DIMENSIONAL_INTERPOLATION_REQUIRES_2D_ARRAY);\n    \n    if (x.size() != M.size(0)) {\n      throw(size_mismatch(\"Interpolation vector x must have same length as first dimension of M in interp2d\"));\n    }\n    if (y.size() != M.size(1)) {\n      throw(size_mismatch(\"Interpolation vector y must have same length as second dimension of M in interp2d\"));\n    }\n    else if (x.size() < 2 || y.size() < 2) {\n      throw(size_mismatch(\"Interpolation array must have at least two elements in each direction in interp2d\"));\n    }\n    else if (xi.dimensions() != yi.dimensions()) {\n      throw(size_mismatch(\"Indexing arrays must be the same shape in interp2d\"));\n    }\n\n    internal::uint interp_scheme, extrap_policy;\n    internal::extract_interp_extrap(options, interp_scheme, extrap_policy);\n    \n    Index ni = xi.size();\n    ExpressionSize<MDims-1> ans_dims;\n    ans_dims[0] = xi.size();\n    for (int ii = 2; ii < MDims; ++ii) {\n      ans_dims[ii-1] = M.size(ii);\n    }\n\n    Array<MDims-1,MType,MIsActive> ans(ans_dims);\n    \n    // Indices to the first of the two elements in each dimension, and\n    // the weight of the first element\n    IntVector xind0(ni);\n    Vector xweight0(ni);\n    IntVector yind0(ni);\n    Vector yweight0(ni);\n    boolVector is_valid(ni);\n    is_valid = true;\n    internal::InterpHelper<XType>::interp_get_indices_weights(x, xi, interp_scheme, extrap_policy,\n\t\t\t\t\t\t\t      xind0, xweight0, is_valid);\n    internal::InterpHelper<YType>::interp_get_indices_weights(y, yi, interp_scheme, extrap_policy,\n\t\t\t\t\t\t\t      yind0, yweight0, is_valid);\n    /*\n    std::cout << \"xind0 \" << xind0 << \"\\n\";\n    std::cout << \"xweight00 \" << xweight0 << \"\\n\";\n    std::cout << \"yind0 \" << yind0 << \"\\n\";\n    std::cout << \"yweight00 \" << yweight0 << \"\\n\";\n    */\n    for (Index ii = 0; ii < ni; ++ii) {\n      if (is_valid(ii)) {\n\t// Bi-linear interpolation\n\tans[ii] = yweight0(ii) * (      xweight0(ii)  * M[xind0(ii)][yind0(ii)]\n\t\t\t\t  +(1.0-xweight0(ii)) * M[xind0(ii)+1][yind0(ii)])\n\t  + (1.0-yweight0(ii)) * (      xweight0(ii)  * M[xind0(ii)][yind0(ii)+1]\n\t\t\t\t  +(1.0-xweight0(ii)) * M[xind0(ii)+1][yind0(ii)+1]);\n      }\n      else {\n\tans[ii] = extrap_value;\n      }\n    }\n    return ans;\n  }\n\n  // Ensure that 2D interpolation works if expressions are provided\n  // for any of the arguments; these are converted to temporary\n  // arrays.\n  template <typename XType, typename YType, typename MType, typename XiType, class YiType,\n\t    class X, class Y, class M, class Xi, class Yi>\n  Array<M::rank-1,MType,M::is_active>\n  interp2d(const Expression<XType,X>& x,\n\t   const Expression<YType,Y>& y,\n\t   const Expression<MType,M>& m,\n\t   const Expression<XiType,Xi>& xi,\n\t   const Expression<YiType,Yi>& yi,\n\t   internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT,\n\t   MType extrap_value = std::numeric_limits<MType>::signaling_NaN()) {\n    const Array<1,XType,false> x2(x.cast());\n    const Array<1,YType,false> y2(y.cast());\n    const Array<M::rank,MType,M::is_active> m2(m.cast());\n    const Array<1,XiType,false> xi2(xi.cast());\n    const Array<1,YiType,false> yi2(yi.cast());\n    return interp2d(x2, y2, m2, xi2, yi2, options, extrap_value);\n  }\n  \n  // 3D interpolation: as 1D interpolation but with two vectors\n  // describing the dimensions of the interpolation array and two\n  // vectors providing points at which interpolated values are\n  // required\n  template <typename XType, typename YType, typename ZType,\n\t    int MDims, typename MType, bool MIsActive,\n\t    typename XiType, typename YiType, typename ZiType>\n  Array<MDims-2,MType,MIsActive>\n  interp3d(const XType& x,\n\t   const YType& y,\n\t   const ZType& z,\n\t   const Array<MDims,MType,MIsActive>& M,\n\t   const Array<1,XiType,false>& xi,\n\t   const Array<1,YiType,false>& yi,\n\t   const Array<1,ZiType,false>& zi,\n\t   internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT,\n\t   MType extrap_value = std::numeric_limits<MType>::signaling_NaN()) {\n\n    ADEPT_STATIC_ASSERT(MDims >= 3, THREE_DIMENSIONAL_INTERPOLATION_REQUIRES_3D_ARRAY);\n    \n    if (x.size() != M.size(0)) {\n      throw(size_mismatch(\"Interpolation vector x must have same length as first dimension of M in interp3d\"));\n    }\n    if (y.size() != M.size(1)) {\n      throw(size_mismatch(\"Interpolation vector y must have same length as second dimension of M in interp3d\"));\n    }\n    if (z.size() != M.size(2)) {\n      throw(size_mismatch(\"Interpolation vector z must have same length as third dimension of M in interp3d\"));\n    }\n    else if (x.size() < 2 || y.size() < 2 || z.size() < 2) {\n      throw(size_mismatch(\"Interpolation array must have at least two elements in each direction in interp3d\"));\n    }\n    else if (xi.dimensions() != yi.dimensions() || xi.dimensions() != zi.dimensions()) {\n      throw(size_mismatch(\"Indexing arrays must be the same shape in interp3d\"));\n    }\n\n    internal::uint interp_scheme, extrap_policy;\n    internal::extract_interp_extrap(options, interp_scheme, extrap_policy);\n    \n    Index ni = xi.size();\n    ExpressionSize<MDims-2> ans_dims;\n    ans_dims[0] = xi.size();\n    for (int ii = 3; ii < MDims; ++ii) {\n      ans_dims[ii-2] = M.size(ii);\n    }\n\n    Array<MDims-2,MType,MIsActive> ans(ans_dims);\n    \n    // Indices to the first of the two elements in each dimension, and\n    // the weight of the first element\n    IntVector xind0(ni);\n    Vector xweight0(ni);\n    IntVector yind0(ni);\n    Vector yweight0(ni);\n    IntVector zind0(ni);\n    Vector zweight0(ni);\n    boolVector is_valid(ni);\n    is_valid = true;\n    internal::InterpHelper<XType>::interp_get_indices_weights(x, xi, interp_scheme, extrap_policy,\n\t\t\t\t\t\t\t      xind0, xweight0, is_valid);\n    internal::InterpHelper<YType>::interp_get_indices_weights(y, yi, interp_scheme, extrap_policy,\n\t\t\t\t\t\t\t      yind0, yweight0, is_valid);\n    internal::InterpHelper<ZType>::interp_get_indices_weights(z, zi, interp_scheme, extrap_policy,\n\t\t\t\t\t\t\t      zind0, zweight0, is_valid);\n    for (Index ii = 0; ii < ni; ++ii) {\n      if (is_valid(ii)) {\n\t// Tri-linear interpolation\n\tans[ii] = xweight0(ii) *\n\t  (yweight0(ii) * (zweight0(ii) * M[xind0(ii)][yind0(ii)][zind0(ii)]\n\t\t\t   +(1.0-zweight0(ii)) * M[xind0(ii)][yind0(ii)][zind0(ii)+1])\n\t   + (1.0-yweight0(ii)) * (zweight0(ii)  * M[xind0(ii)][yind0(ii)+1][zind0(ii)]\n\t\t\t\t   +(1.0-zweight0(ii)) * M[xind0(ii)][yind0(ii)+1][zind0(ii)+1]))\n\t  + (1.0 - xweight0(ii)) *\n\t  (yweight0(ii) * (zweight0(ii) * M[xind0(ii)+1][yind0(ii)][zind0(ii)]\n\t\t\t   +(1.0-zweight0(ii)) * M[xind0(ii)+1][yind0(ii)][zind0(ii)+1])\n\t   + (1.0-yweight0(ii)) * (zweight0(ii)  * M[xind0(ii)+1][yind0(ii)+1][zind0(ii)]\n\t\t\t\t   +(1.0-zweight0(ii)) * M[xind0(ii)+1][yind0(ii)+1][zind0(ii)+1]));\n      }\n      else {\n\tans[ii] = extrap_value;\n      }\n    }\n    return ans;\n  }\n\n  // Ensure that 3D interpolation works if expressions are provided\n  // for any of the arguments; these are converted to temporary\n  // arrays.\n  template <typename XType, typename YType, typename ZType, typename MType,\n\t    typename XiType, class YiType, class ZiType,\n\t    class X, class Y, class Z, class M, class Xi, class Yi, class Zi>\n  Array<M::rank-2,MType,M::is_active>\n  interp3d(const Expression<XType,X>& x,\n\t   const Expression<YType,Y>& y,\n\t   const Expression<ZType,Z>& z,\n\t   const Expression<MType,M>& m,\n\t   const Expression<XiType,Xi>& xi,\n\t   const Expression<YiType,Yi>& yi,\n\t   const Expression<ZiType,Zi>& zi,\n\t   internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT,\n\t   MType extrap_value = std::numeric_limits<MType>::signaling_NaN()) {\n    const Array<1,XType,false> x2(x.cast());\n    const Array<1,YType,false> y2(y.cast());\n    const Array<1,ZType,false> z2(z.cast());\n    const Array<M::rank,MType,M::is_active> m2(m.cast());\n    const Array<1,XiType,false> xi2(xi.cast());\n    const Array<1,YiType,false> yi2(yi.cast());\n    const Array<1,ZiType,false> zi2(zi.cast());\n    return interp3d(x2, y2, z2, m2, xi2, yi2, zi2, options, extrap_value);\n  }\n  \n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/inv.h",
    "content": "/* inv.h -- Invert matrices\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                             \n#ifndef AdeptInv_H\n#define AdeptInv_H 1\n\n#include <vector>\n\n#include <adept/Array.h>\n#include <adept/SpecialMatrix.h>\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Invert general square matrix A\n  // -------------------------------------------------------------------\n  template <typename Type>\n  Array<2,Type,false> \n  inv(const Array<2,Type,false>& A);\n\n  // -------------------------------------------------------------------\n  // Invert symmetric matrix A\n  // -------------------------------------------------------------------\n  template <typename Type, SymmMatrixOrientation Orient>\n  SpecialMatrix<Type,internal::SymmEngine<Orient>,false> \n  inv(const SpecialMatrix<Type,internal::SymmEngine<Orient>,false>& A);\n \n  // -------------------------------------------------------------------\n  // Invert arbitrary expression\n  // -------------------------------------------------------------------\n  template <typename Type, class E>\n  typename internal::enable_if<E::rank==2 && E::is_active==false\n\t\t\t       && internal::matrix_op_defined<Type>::value,\n\t\t\t       Array<2,Type,false> >::type\n  inv(const Expression<Type,E>& A) {\n    Array<2,Type,false> array = A.cast();\n    return inv(array);\n  }\n \n}\n\n#endif\n"
  },
  {
    "path": "include/adept/matmul.h",
    "content": "/* matmul.h -- Matrix multiplication capability\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                             \n#ifndef AdeptMatmul_H\n#define AdeptMatmul_H\n\n#include <cmath>\n\n#include <adept/Array.h>\n#include <adept/SpecialMatrix.h>\n#include <adept/cppblas.h>\n\nnamespace adept {\n\n  namespace internal {\n\n    // ---------------------------------------------------------------------\n    // Helper functions for checking dimensions\n    // ---------------------------------------------------------------------\n    template <class L, class R>\n    inline\n    void\n    check_inner_dimensions(const L& left, const R& right) {\n      if (left.empty() || right.empty()) {\n\tthrow empty_array(\"Attempt to perform matrix multiplication with empty array(s)\"\n\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      if (left.dimension(1) != right.dimension(0)) {\n\tthrow inner_dimension_mismatch(\"Inner dimension mismatch in array multiplication\"\n\t\t\t\t       ADEPT_EXCEPTION_LOCATION);\n      }\n    }\n\n    template <class R>\n    inline\n    void\n    check_inner_dimensions_sqr(Index left_dim, const R& right) {\n      if (left_dim == 0 || right.empty()) {\n\tthrow empty_array(\"Attempt to perform matrix multiplication with empty array(s)\"\n\t\t\t  ADEPT_EXCEPTION_LOCATION);\n      }\n      if (left_dim != right.dimension(0)) {\n\tthrow inner_dimension_mismatch(\"Inner dimension mismatch in array multiplication\"\n\t\t\t\t       ADEPT_EXCEPTION_LOCATION);\n      }\n    }\n\n    // ---------------------------------------------------------------------\n    // Underlying functions\n    // ---------------------------------------------------------------------\n\n    // Dense matrix-vector multiplication\n    template <typename T, bool LIsActive, bool RIsActive>\n    inline\n    Array<1,T,(LIsActive||RIsActive)>\n    matmul_(const Array<2,T,LIsActive>& left, const Array<1,T,RIsActive>& right) {      \n      static const bool is_active = LIsActive || RIsActive;\n\n      check_inner_dimensions(left, right);\n\n      Array<1,T,is_active> ans(left.dimension(0));\n\n      Index stride;\n      BLAS_ORDER order;\n      if (!left.is_row_contiguous() && !left.is_column_contiguous()) {\n\t// Matrix is strided in both directions so needs to be copied\n\t// first\n\tArray<2,T,LIsActive> left_;\n\tleft_ = left;\n\treturn matmul_(left_, right);\n      }\n      else if (left.is_row_contiguous()) {\n\torder = BlasRowMajor;\n\tstride = left.offset(0);\n      }\n      else {\n\torder = BlasColMajor;\n\tstride = left.offset(1);\n      }\n      cppblas_gemv(order, BlasNoTrans, left.dimension(0), left.dimension(1), \n\t\t   1.0, left.const_data(), stride, \n\t\t   right.const_data(), right.offset(0), \n\t\t   0.0, ans.data(), ans.offset(0));\n      if (is_active\n#ifdef ADEPT_RECORDING_PAUSABLE\n\t  && ADEPT_ACTIVE_STACK->is_recording()\n#endif\n\t  ) {\n\n\tuIndex left_index = left.gradient_index();\n\tuIndex right_index = right.gradient_index();\n\tuIndex ans_index = ans.gradient_index();\n\tIndex n = right.dimension(0);\n\tconst ExpressionSize<2>& left_offset = left.offset();\n\tconst ExpressionSize<1>& right_offset = right.offset();\n\tfor (Index i = 0; i < ans.dimension(0); ++i) {\n\t  if (LIsActive) {\n\t    active_stack()->push_derivative_dependence(left_index+i*left_offset[0], \n\t\t\t\t\t\t       right.const_data(), n, left_offset[1], right_offset[0]);\n\t  }\n\t  if (RIsActive) {\n\t    active_stack()->push_derivative_dependence(right_index, \n\t\t\t\t\t\t       left.const_data()+i*left_offset[0], \n\t\t\t\t\t\t       n, right_offset[0], left_offset[1]);\n\t  }\n\t  active_stack()->push_lhs(ans_index + i*ans.offset(0));\n\t}\n      }\n\n      return ans;\n    }\n\n\n    // Dense matrix-matrix multiplication\n    template <typename T, bool LIsActive, bool RIsActive>\n    inline\n    Array<2,T,(LIsActive||RIsActive)>\n    matmul_(const Array<2,T,LIsActive>& left, const Array<2,T,RIsActive>& right) {\n      static const bool is_active = LIsActive || RIsActive;\n\n      check_inner_dimensions(left, right);\n\n      if (!left.is_row_contiguous() && !left.is_column_contiguous()) {\n\tArray<2,T,LIsActive> left_;\n\tleft_ = left;\n\tif (!right.is_row_contiguous() && !right.is_column_contiguous()) {\n\t  Array<2,T,RIsActive> right_;\n\t  right_ = right;\n\t  return matmul_(left_, right_);\n\t}\n\telse {\n\t  return matmul_(left_, right);\n\t}\n      }\n      else if (!right.is_row_contiguous() && !right.is_column_contiguous()) {\n\tArray<2,T,RIsActive> right_;\n\tright_ = right;\n\treturn matmul_(left, right_);\n      }\n      else {\n\tIndex left_stride, right_stride, ans_stride;\n\tBLAS_TRANSPOSE left_trans, right_trans;\n\tBLAS_ORDER order;\n\tArray<2,T,is_active> ans(left.dimension(0),right.dimension(1));\n\n\tif (ans.is_row_contiguous()) {\n\t  order = BlasRowMajor;\n\t  ans_stride = ans.offset(0);\n\t}\n\telse {\n\t  order = BlasColMajor;\n\t  ans_stride = ans.offset(1);\n\t}\n\tif (left.is_row_contiguous()) {\n\t  left_trans = order == BlasRowMajor ? BlasNoTrans : BlasTrans;\n\t  left_stride = left.offset(0);\n\t}\n\telse {\n\t  left_trans = order == BlasColMajor ? BlasNoTrans : BlasTrans;\n\t  left_stride = left.offset(1);\n\t}\n\tif (right.is_row_contiguous()) {\n\t  right_trans = order == BlasRowMajor ? BlasNoTrans : BlasTrans;\n\t  right_stride = right.offset(0);\n\t}\n\telse {\n\t  right_trans = order == BlasColMajor ? BlasNoTrans : BlasTrans;\n\t  right_stride = right.offset(1);\n\t}\n\tcppblas_gemm(order, left_trans, right_trans,\n\t\t    left.dimension(0), right.dimension(1), left.dimension(1),\n\t\t    1.0, left.const_data(), left_stride,\n\t\t    right.const_data(), right_stride,\n\t\t    0.0, ans.data(), ans_stride);\n\tif ( (LIsActive || RIsActive)\n#ifdef ADEPT_RECORDING_PAUSABLE\n\t    && ADEPT_ACTIVE_STACK->is_recording()\n#endif\n\t    ) {\n\t  uIndex left_index = left.gradient_index();\n\t  uIndex right_index = right.gradient_index();\n\t  uIndex ans_index = ans.gradient_index();\n\t  Index n = right.dimension(0);\n\t  const ExpressionSize<2>& left_offset = left.offset();\n\t  const ExpressionSize<2>& right_offset = right.offset();\n\n\t  for (Index i = 0; i < ans.dimension(0); ++i) {\n\t    for (Index j = 0; j < ans.dimension(1); ++j) {\n\t      if (LIsActive) {\n\t\tactive_stack()->push_derivative_dependence(left_index+i*left_offset[0], \n\t\t\t   right.const_data()+j*right_offset[1], n, \n\t\t\t   left_offset[1], right_offset[0]);\n\t      }\n\t      if (RIsActive) {\n\t\tactive_stack()->push_derivative_dependence(right_index+j*right_offset[1], \n\t\t\t   left.const_data()+i*left_offset[0], n, \n\t\t\t   right_offset[0], left_offset[1]);\n\t      }\n\t      active_stack()->push_lhs(ans_index + i*ans.offset(0) + j*ans.offset(1));\n\t    }\n\t  }\n\n\t}\n\treturn ans;\n      }\n    }\n\n    // Symmetric matrix-vector multiplication\n    template <bool LIsActive, typename T, bool RIsActive>\n    inline\n    Array<1,T,(LIsActive||RIsActive)>\n    matmul_symmetric(const T* left_ptr, SymmMatrixOrientation left_orient, Index left_dim,\n\t\t     Index left_offset, uIndex left_gradient_index,\n\t\t     const Array<1,T,RIsActive>& right) {\n\n      check_inner_dimensions_sqr(left_dim, right);\n\n      if (LIsActive || RIsActive) {\n\tthrow(invalid_operation(\"Cannot yet do matmul(SymmMatrix,Vector) when either are active\"));\n      }\n      BLAS_UPLO uplo;\n      if (left_orient == ROW_LOWER_COL_UPPER) {\n\tuplo = BlasLower;\n      }\n      else {\n\tuplo = BlasUpper;\n      }\n      Array<1,T,LIsActive||RIsActive> ans(right.dimension(0));\n      cppblas_symv(BlasRowMajor, uplo, right.dimension(0), \n\t\t   1.0, left_ptr, left_offset, \n\t\t   right.const_data(), right.offset(0), \n\t\t   0.0, ans.data(), ans.offset(0));\n      return ans;\n    }\n\n    // Symmetric matrix-matrix multiplication\n    template <bool LIsActive, typename T, bool RIsActive>\n    inline\n    Array<2,T,(LIsActive||RIsActive)>\n    matmul_symmetric(const T* left_ptr, SymmMatrixOrientation left_orient, Index left_dim,\n\t\t     Index left_offset, uIndex left_gradient_index,\n\t\t     const Array<2,T,RIsActive>& right) {\n\n      check_inner_dimensions_sqr(left_dim, right);\n\n      if (LIsActive || RIsActive) {\n\tthrow(invalid_operation(\"Cannot yet do matmul(SymmMatrix,Matrix) when either are active\"));\n      }\n      if (!right.is_row_contiguous() && !right.is_column_contiguous()) {\n\tArray<2,T,RIsActive> right_;\n\tright_ = right;\n\treturn matmul_symmetric<LIsActive>(left_ptr, left_orient, left_dim, left_offset,\n\t\t\t\t\t   left_gradient_index, right_);\n      }\n      else {\n\tBLAS_ORDER order;\n\tBLAS_UPLO uplo;\n\tIndex right_stride, ans_stride;\n\tArray<2,T,LIsActive||RIsActive> ans;\n\n\tif (right.is_row_contiguous()) {\n\t  order = BlasRowMajor;\n\t  uplo = left_orient == ROW_LOWER_COL_UPPER ? BlasLower : BlasUpper;\n\t  right_stride = right.offset(0);\n\t  ans.resize_row_major(right.dimensions());\n\t  ans_stride = ans.offset(0);\n\t}\n\telse {\n\t  order = BlasColMajor;\n\t  uplo = left_orient == ROW_LOWER_COL_UPPER ? BlasUpper : BlasLower;\n\t  right_stride = right.offset(1);\n\t  ans.resize_column_major(right.dimensions());\n\t  ans_stride = ans.offset(1);\n\t}\n\n\tcppblas_symm(order, BlasLeft, uplo,  right.dimension(0), right.dimension(1),\n\t\t     1.0, left_ptr, left_offset, \n\t\t     right.const_data(), right_stride, 0.0,\n\t\t     ans.data(), ans_stride);\n\treturn ans;\n      }\n    }\n\n\n    // Band matrix-vector multiplication\n    template <bool LIsActive, typename T, bool RIsActive>\n    inline\n    Array<1,T,(LIsActive||RIsActive)>\n    matmul_band(const T* left_ptr, MatrixStorageOrder left_order, \n\t\tIndex LDiags, Index UDiags, Index left_dim, Index left_offset,\n\t\tuIndex left_gradient_index, const Array<1,T,RIsActive>& right) {\n      check_inner_dimensions_sqr(left_dim, right);\n\n      if (LIsActive) {\n\tthrow(invalid_operation(\"Cannot yet do matmul(BandMatrix,Vector) for active BandMatrix\"));\n      }\n\n      BLAS_ORDER order;\n      // BLAS declares the start pointer to be in the \"missing data\"\n      // zone, so we need to subtract from the address of the top-left\n      // corner of the matrix\n      const T* left_start;\n      if (left_order == ROW_MAJOR) {\n\torder = BlasRowMajor;\n\tleft_start = left_ptr-UDiags;\n      }\n      else {\n\torder = BlasColMajor;\n\tleft_start = left_ptr-LDiags;\n      }\n      Array<1,T,(LIsActive||RIsActive)> ans(right.dimension(0));\n      cppblas_gbmv(order, BlasNoTrans, left_dim, left_dim, LDiags, UDiags,\n\t\t   1.0, left_start, left_offset+1,\n\t\t   right.const_data(), right.offset(0), \n\t\t   0.0, ans.data(), ans.offset(0));\n      if (RIsActive) {\n\tuIndex right_index = right.gradient_index();\n\tuIndex ans_index = ans.gradient_index();\n\n\tif (left_order == ROW_MAJOR) {\n\t  for (Index i = 0; i < ans.dimension(0); ++i) {\n\t    // Using info from BandEngine<ROW_MAJOR>::get_row_range in\n\t    // SpecialMatrix.h\n\t    Index j_start = i<LDiags ? 0 : i-LDiags;\n\t    Index j_end_plus_1 = i+UDiags+1>left_dim ? left_dim : i+UDiags+1;\n\t    Index n = j_end_plus_1 - j_start;\n\t    Index index_start = i*left_offset + j_start;\n\t    Index index_stride = 1;\n\t    active_stack()->push_derivative_dependence(right_index + j_start, \n\t\t\t\t\t\t       left_ptr+index_start,\n\t\t\t\t\t\t       n, right.offset(0), index_stride);\n\t    active_stack()->push_lhs(ans_index + i*ans.offset(0));\n\t  }\n\t}\n\telse {\n\t  for (Index i = 0; i < ans.dimension(0); ++i) {\n\t    // Using info from BandEngine<COL_MAJOR>::get_row_range in\n\t    // SpecialMatrix.h\n\t    Index j_start = i<LDiags ? 0 : i-LDiags;\n\t    Index j_end_plus_1 = i+UDiags+1>left_dim ? left_dim : i+UDiags+1;\n\t    Index n = j_end_plus_1 - j_start;\n\t    Index index_start = i + j_start*left_offset;\n\t    Index index_stride = left_offset;\n\t    active_stack()->push_derivative_dependence(right_index + j_start, \n\t\t\t\t\t\t       left_ptr+index_start,\n\t\t\t\t\t\t       n, right.offset(0), index_stride);\n\t    active_stack()->push_lhs(ans_index + i*ans.offset(0));\n\t  }\n\t}\n      }\n      return ans;\n    }\n\n\n    // Matrix-matrix multiplication with a band matrix on the left,\n    // achieved by repeated matrix-vector multiplications\n    template <bool LIsActive, typename T, bool RIsActive>\n    inline\n    Array<2,T,(LIsActive||RIsActive)>\n    matmul_band(const T* left_ptr, MatrixStorageOrder left_order, \n\t\tIndex LDiags, Index UDiags, Index left_dim, Index left_offset,\n\t\tuIndex left_gradient_index, const Array<2,T,RIsActive>& right) {\n      check_inner_dimensions_sqr(left_dim, right);\n      if (LIsActive || RIsActive) {\n\tthrow(invalid_operation(\"Cannot yet do matmul(BandMatrix,Matrix) when either are active\"));\n      }\n      BLAS_ORDER order;\n      // BLAS declares the start pointer to be in the \"missing data\"\n      // zone, so we need to subtract from the address of the top-left\n      // corner of the matrix\n      const T* left_start;\n      if (left_order == ROW_MAJOR) {\n\torder = BlasRowMajor;\n\tleft_start = left_ptr-UDiags;\n      }\n      else {\n\torder = BlasColMajor;\n\tleft_start = left_ptr-LDiags;\n      }\n      Array<2,T,(LIsActive||RIsActive)> ans(right.dimension(0),right.dimension(1));\n      for (Index i = 0; i < right.dimension(1); ++i) {\n\tcppblas_gbmv(order, BlasNoTrans, left_dim, left_dim, LDiags, UDiags,\n\t\t     1.0, left_start, left_offset+1,\n\t\t     right.const_data()+i*right.offset(1), right.offset(0), \n\t\t     0.0, ans.data()+i*ans.offset(1), ans.offset(0));\n      }\n      return ans;\n    }\n    \n\n    // ---------------------------------------------------------------------\n    // Versions of matmul_ implemented in terms of the underlying functions\n    // ---------------------------------------------------------------------\n\n    // Dense vector-matrix multiplication is evaluated by swapping and\n    // transposing the arguments\n    template <typename T, bool LIsActive, bool RIsActive>\n    inline\n    Array<1,T,(LIsActive||RIsActive)>\n    matmul_(const Array<1,T,LIsActive>& left,\n\t    const Array<2,T,RIsActive>& right) {\n      return matmul_(right.T(), left);\n    }\n\n    // Symmetric matrix-vector and matrix-matrix multiplication\n    template <typename T, SymmMatrixOrientation LOrient, bool LIsActive, bool RIsActive, int RRank>\n    inline\n    Array<RRank,T,(LIsActive||RIsActive)>\n    matmul_(const SpecialMatrix<T,internal::SymmEngine<LOrient>,LIsActive>& left,\n\t    const Array<RRank,T,RIsActive>& right) {\n      return matmul_symmetric<LIsActive>(left.const_data(), LOrient, left.dimension(0),\n\t\t\t\t\t left.offset(), left.gradient_index(), right);\n    }\n\n    // Vector multiplied by symmetric matrix: swap and transpose the arguments\n    template <typename T, bool LIsActive, SymmMatrixOrientation ROrient, bool RIsActive>\n    inline\n    Array<1,T,(LIsActive||RIsActive)>\n    matmul_(const Array<1,T,LIsActive>& left,\n\t    const SpecialMatrix<T,internal::SymmEngine<ROrient>,RIsActive>& right) {\n      return matmul_symmetric<RIsActive>(right.const_data(), ROrient, \n\t\t\t\t\t right.dimension(0), right.offset(),\n\t\t\t\t\t right.gradient_index(), left);\n    }\n\n    // Dense matrix multiplied by symmetric matrix: swap and transpose\n    // the arguments, then transpose the result\n    template <typename T, bool LIsActive, SymmMatrixOrientation ROrient, bool RIsActive>\n    inline\n    Array<2,T,(LIsActive||RIsActive)>\n    matmul_(const Array<2,T,LIsActive>& left,\n\t    const SpecialMatrix<T,internal::SymmEngine<ROrient>,RIsActive>& right) {\n      return matmul_symmetric<RIsActive>(right.const_data(), ROrient,\n\t\t\t\t\t right.dimension(0), right.offset(),\n\t\t\t\t\t right.gradient_index(), left.T()).T();\n    }\n\n    // Band matrix-vector and matrix-matrix multiplication\n    template <typename T, MatrixStorageOrder LOrder, Index LDiags, Index UDiags, \n\t      bool LIsActive, bool RIsActive, int RRank>\n    inline\n    Array<RRank,T,(LIsActive||RIsActive)>\n    matmul_(const SpecialMatrix<T,internal::BandEngine<LOrder,LDiags,UDiags>,LIsActive>& left,\n\t    const Array<RRank,T,RIsActive>& right) {\n      return matmul_band<LIsActive>(left.const_data(), LOrder, LDiags, UDiags,\n\t\t\t\t    left.dimension(0), left.offset(), left.gradient_index(), right);\n    }\n\n    // Vector multiplied by band matrix: swap and transpose the arguments\n    template <typename T, bool LIsActive, MatrixStorageOrder ROrder, Index LDiags, Index UDiags,\n\t      bool RIsActive>\n    inline\n    Array<1,T,(LIsActive||RIsActive)>\n    matmul_(const Array<1,T,LIsActive>& left,\n\t    const SpecialMatrix<T,internal::BandEngine<ROrder,LDiags,UDiags>,RIsActive>& right) {\n      static const MatrixStorageOrder new_r_order = ROrder == ROW_MAJOR ? COL_MAJOR : ROW_MAJOR;\n      return matmul_band<RIsActive>(right.const_data(), new_r_order, UDiags, LDiags,\n\t\t\t\t    right.dimension(0), right.offset(), right.gradient_index(), left);\n    }\n\n    // Dense matrix multiplied by band matrix: swap and transpose the\n    // arguments, then transpose the result\n    template <typename T, bool LIsActive, MatrixStorageOrder ROrder, Index LDiags, Index UDiags,\n\t      bool RIsActive>\n    inline\n    Array<2,T,(LIsActive||RIsActive)>\n    matmul_(const Array<2,T,LIsActive>& left,\n\t    const SpecialMatrix<T,internal::BandEngine<ROrder,LDiags,UDiags>,RIsActive>& right) {\n      static const MatrixStorageOrder new_r_order = ROrder == ROW_MAJOR ? COL_MAJOR : ROW_MAJOR;\n      return matmul_band<RIsActive>(right.const_data(), new_r_order, UDiags, LDiags,\n\t\t\t\t    right.dimension(0), right.offset(), right.gradient_index(), left.T()).T();\n    }\n\n\n    // ---------------------------------------------------------------------\n    // promote_array: helper function to change type of array and\n    // convert expressions to arrays\n    // ---------------------------------------------------------------------\n\n    // If the argument is not an l-value then convert it to a dense\n    // array of the same rank\n    template <typename NewType, typename OldType, class A>\n    inline\n    typename internal::enable_if<!A::is_lvalue,Array<A::rank,NewType,A::is_active> >::type\n    promote_array(const Expression<OldType,A>& arg) {\n      return Array<A::rank,NewType,A::is_active>(arg);\n    }\n\n    // If the argument is a dense array then convert it to the new\n    // type; this will only involve a copy of the raw data if the type\n    // is changed, otherwise the new array will simply link to the old\n    // one\n    template <typename NewType, int Rank, typename OldType, bool IsActive>\n    inline\n    Array<Rank,NewType,IsActive>\n    promote_array(const Array<Rank,OldType,IsActive>& arg) {\n      return Array<Rank,NewType,IsActive>(const_cast<Array<Rank,OldType,IsActive>&>(arg));\n    }\n\n#ifdef ADEPT_ONLY_DIFFERENTIATE_DENSE_MATRIX_MULTIPLICATION\n    // If the argument is an active special matrix then it must be\n    // copied to a dense \"Array\" because differentiation of the\n    // various types of special matrix (symmetric, band, upper, lower\n    // etc) is not yet implemented.\n    template <typename NewType, typename OldType, class Engine>\n    inline\n    Array<2,NewType,true>\n    promote_array(const SpecialMatrix<OldType,Engine,true>& arg) {\n      return Array<2,NewType,true>(\n\t   const_cast<SpecialMatrix<OldType,Engine,true>&>(arg));\n    }\n\n    // If the argument is an inactive symmetric or band matrix then\n    // convert the element type; this will only involve a copy of the\n    // raw data if the type is changed, otherwise the new array will\n    // simply link to the old\n    template <typename NewType, typename OldType, SymmMatrixOrientation Orient>\n    inline\n    SpecialMatrix<NewType,internal::SymmEngine<Orient>,false>\n    promote_array(const SpecialMatrix<OldType,internal::SymmEngine<Orient>,false>& arg) {\n      return SpecialMatrix<NewType,internal::SymmEngine<Orient>,false>(\n\t const_cast<SpecialMatrix<OldType,internal::SymmEngine<Orient>,false>&>(arg));\n    }\n    template <typename NewType, typename OldType, \n      MatrixStorageOrder Order, Index LDiags, Index UDiags>\n    inline\n    SpecialMatrix<NewType,internal::BandEngine<Order,LDiags,UDiags>,false>\n    promote_array(const SpecialMatrix<OldType,internal::BandEngine<Order,LDiags,UDiags>,false>& arg) {\n      return SpecialMatrix<NewType,internal::BandEngine<Order,LDiags,UDiags>,false>(\n\t const_cast<SpecialMatrix<OldType,internal::BandEngine<Order,LDiags,UDiags>,false>&>(arg));\n    } \n\n    // For other special matrices (square and triangular), specific\n    // matrix multiplication functions have not yet been added, so we\n    // have to convert to a dense array first\n    template <typename NewType, typename OldType, class Engine>\n    inline\n    Array<2,NewType,false>\n    promote_array(const SpecialMatrix<OldType,Engine,false>& arg) {\n      return Array<2,NewType,false>(\n\t const_cast<SpecialMatrix<OldType,Engine,false>&>(arg));\n    } \n\n#else\n    // The following assumes that the Adept library knows how to\n    // differentiate special matrices: currently it doesn't so this\n    // path is likely to throw a run-time exception.\n    template <typename NewType, typename OldType, class Engine, bool IsActive>\n    inline\n    SpecialMatrix<NewType,Engine,IsActive>\n    promote_array(const SpecialMatrix<OldType,Engine,IsActive>& arg) {\n      return SpecialMatrix<NewType,Engine,IsActive>(\n\t\t     const_cast<SpecialMatrix<OldType,Engine,IsActive>&>(arg));\n    }\n#endif\n\n    // If the argument is a fixed array of a different type then copy it\n    template <typename NewType, typename OldType, bool IsActive, Index J0,\n\t      Index J1, Index J2, Index J3, Index J4, Index J5, Index J6>\n    inline\n    typename enable_if<!is_same<NewType,OldType>::value,\n\t\t       Array<fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank,\n\t\t\t     NewType,IsActive> >::type\n    promote_array(const FixedArray<OldType,IsActive,J0,J1,J2,J3,J4,J5,J6>& arg) {\n      return Array<fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank, \n\tNewType,IsActive>(const_cast<FixedArray<OldType,IsActive,J0,J1,J2,J3,J4,J5,J6>&>(arg));\n    }\n\n    // If the argument is a fixed array of the same type then link to it\n    template <typename NewType, typename OldType, bool IsActive, Index J0, \n\t      Index J1, Index J2, Index J3, Index J4, Index J5, Index J6>\n    inline\n    typename enable_if<is_same<NewType,OldType>::value,\n\t\t       Array<fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank,\n\t\t\t     NewType,IsActive> >::type\n    promote_array(const FixedArray<OldType,IsActive,J0,J1,J2,J3,J4,J5,J6>& arg) {\n      return Array<fixed_array<J0,J1,J2,J3,J4,J5,J6>::rank,NewType,IsActive>\n\t(const_cast<FixedArray<OldType,IsActive,J0,J1,J2,J3,J4,J5,J6>&>(arg).data(), 0,\n\t arg.dimensions(), arg.offset(), arg.gradient_index());\n    }\n\n  } // End namespace internal\n\n  // ---------------------------------------------------------------------\n  // matmul function: replicates Fortran-90 equivalent\n  // ---------------------------------------------------------------------\n\n  // If either argument is not an lvalue (i.e. is an array expression\n  // rather than an array) then convert it into a dense array\n  template <typename LType, class L, typename RType, class R>\n  inline\n  typename internal::enable_if<(L::rank == 1 || L::rank == 2) && (R::rank == 1 || R::rank == 2)\n                      && (L::rank+R::rank > 2),\n    Array<L::rank+R::rank-2,typename internal::promote<LType,RType>::type,\n    L::is_active||R::is_active> >::type\n  matmul(const Expression<LType,L>& left, const Expression<RType,R>& right) {\n    typedef typename internal::promote<typename L::type,typename R::type>::type type;\n    return internal::matmul_(internal::promote_array<type>(left.cast()),\n\t\t\t     internal::promote_array<type>(right.cast()));\n  }\n  \n\n  // ---------------------------------------------------------------------\n  // Implement \"**\" pseudo-operator for matrix multiplication\n  // ---------------------------------------------------------------------\n\n  // In order for A**B to lead to matrix multiplication, *B will\n  // return a MatmulRHS object, and A*[a MatmulRHS object] will send\n  // the two arguments to the matmul function\n\n  namespace internal {\n\n    // The MatmulRHS class simply contains a reference to an array\n    template <class A>\n    struct MatmulRHS {\n      // The following are not used but enable\n      // expr_cast<MatmulRHS>::... to work\n      static const int  rank      = A::rank;\n      static const bool is_active = A::is_active;\n      static const int  n_arrays  = 0;\n      static const bool n_active  = 0;\n      static const bool is_lvalue = false;\n      static const bool is_vectorizable = false;\n      static const int  n_scratch = 0;\n      // The following are necessary in order that other binary\n      // operator* functions can compile, even if they are rejected\n      // for a particular multiplication\n      typedef typename A::type type;\n      typedef bool _adept_expression_flag;\n      // Constructor simply saves a reference to the expression\n      // argument\n      MatmulRHS(const A& a) : array(a) { }\n      const A& array;\n    };\n  }\n\n  // Dereference operator returns a MatmulRHS object\n  template <typename Type, class A>\n  inline\n  typename internal::enable_if<(A::rank == 1 || A::rank == 2),\n\t\t\t       internal::MatmulRHS<A> >::type\n  operator*(const Expression<Type,A>& a) {\n    return internal::MatmulRHS<A>(a.cast());\n  }\n\n  // Multiply operator with a MatmulRHS object on the right-hand-side\n  // will call the matmul function\n  template <typename LType, class L, class R>\n  inline\n  Array<L::rank+R::rank-2,typename internal::promote<LType,typename R::type>::type,\n\t(L::is_active||R::is_active)>\n  operator*(const Expression<LType,L>& left, const internal::MatmulRHS<R>& right) {\n    return matmul(left.cast(),right.array.cast());\n  }\n\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/noalias.h",
    "content": "/* noalias.h -- Wrap an expression so that alias checking is not performed\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptNoalias_H\n#define AdeptNoalias_H\n\n#include <adept/Expression.h>\n\nnamespace adept {\n\n  namespace internal {\n\n    // No-alias wrapper for enabling noalias()\n    template <typename Type, class R>\n    struct NoAlias\n      : public Expression<Type, NoAlias<Type, R> > \n    {\n      static const int  rank       = R::rank;\n      static const bool is_active  = R::is_active;\n      static const int  n_active   = R::n_active;\n      static const int  n_scratch  = R::n_scratch;\n      static const int  n_arrays   = R::n_arrays;\n      static const bool is_vectorizable = R::is_vectorizable;\n\n      const R& arg;\n\n      NoAlias(const Expression<Type, R>& arg_)\n\t: arg(arg_.cast()) { }\n      \n      template <int Rank>\n\tbool get_dimensions_(ExpressionSize<Rank>& dim) const {\n\treturn arg.get_dimensions(dim);\n      }\n\n//       Index get_dimension_with_len(Index len) const {\n// \treturn arg.get_dimension_with_len_(len);\n//       }\n\n      std::string expression_string_() const {\n\tstd::string str = \"noalias(\";\n\tstr += static_cast<const R*>(&arg)->expression_string() + \")\";\n\treturn str;\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn false;\n      }\n      bool all_arrays_contiguous_() const {\n\treturn arg.all_arrays_contiguous_(); \n      }\n \n      bool is_aligned_() const {\n\treturn arg.is_aligned_();\n      } \n     \n      template <int n>\n      int alignment_offset_() const { \n        return arg.template alignment_offset_<n>();\n      }\n\n      template <int Rank>\n      Type value_with_len_(Index i, Index len) const {\n\treturn operation(arg.value_with_len(i, len));\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\targ.template advance_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn arg.template value_at_location_<MyArrayNum>(loc);\n      }\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn arg.template packet_at_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn arg.template value_at_location_store_<MyArrayNum,MyScratchNum>(loc, \n\t\t\t\t\t\t\t\t     scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\targ.template calc_gradient_<MyArrayNum, MyScratchNum>(stack, loc, \n\t\t\t\t\t\t\t      scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch,\n\t\ttypename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n\targ.template calc_gradient_<MyArrayNum, MyScratchNum+1>(stack, loc, \n\t\t\t\t\t\t\t\tscratch,\n\t\t\t\t\t\t\t\tmultiplier);\n      }\n\n      template <int MyArrayNum, int Rank, int NArrays>\n      void set_location_(const ExpressionSize<Rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\targ.template set_location_<MyArrayNum>(i, index);\n      }\n\n    }; // End struct NoAlias\n\n  }\n\n  template <typename Type, class R>\n  inline\n  adept::internal::NoAlias<Type, R>\n  noalias(const Expression<Type, R>& r) {\n    return adept::internal::NoAlias<Type, R>(r.cast());\n  }\n\n  template <typename Type>\n  inline\n  typename internal::enable_if<internal::is_not_expression<Type>::value, Type>::type\n  noalias(const Type& r) {\n    return r;\n  }\n\n}\n\n#endif\n"
  },
  {
    "path": "include/adept/outer_product.h",
    "content": "/* outer_product.h -- Compute the outer product of two vectors\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                             \n#ifndef AdeptOuterProduct_H\n#define AdeptOuterProduct_H\n\n#include <adept/BinaryOperation.h>\n#include <adept/Array.h>\n\nnamespace adept {\n\n  namespace internal {\n\n    // Expression representing an outer product\n    template <typename Type, typename LType, class L, typename RType, class R>\n    class OuterProduct\n      : public Expression<Type, OuterProduct<Type,LType,L,RType,R> > {\n\n      typedef Array<1,LType,L::is_active> LArray;\n      typedef Array<1,RType,R::is_active> RArray;\n\n    public:\n      // Static data\n      static const int rank  = 2;\n      static const bool is_active  = L::is_active || R::is_active;\n      static const int  store_result = is_active;\n      static const int  n_active  = LArray::n_active + RArray::n_active;\n      static const int  n_local_scratch = store_result; \n      static const int  n_scratch \n        = n_local_scratch + LArray::n_scratch + RArray::n_scratch;\n      static const int  n_arrays  = LArray::n_arrays + RArray::n_arrays;\n      // Currently not vectorizable because the current design always\n      // has the array index increasing\n      //      static const bool is_vectorizable = is_same<LType,RType>::value;\n      static const bool is_vectorizable = false;//is_same<LType,RType>::value;\n\n    protected:\n\n      // DATA: need to store actual arrays to avoid temporaries going\n      // out of scope before they're used; note that if an array is\n      // passed in then a shallow copy is made.\n      const LArray left;\n      const RArray right;\n \n    public:\n\n      OuterProduct(const Expression<LType,L>& left_,\n\t\t   const Expression<RType,R>& right_) \n\t: left(left_.cast()), right(right_.cast()) { }\n\n      bool get_dimensions_(ExpressionSize<2>& dim) const {\n\tdim[0] = left.size();\n\tdim[1] = right.size();\n\n\treturn dim[0] > 0 && dim[1] > 0;\n      }\n\n      std::string expression_string_() const {\n\treturn \"outer_product(\" + left.expression_string() + \",\"\n\t  + right.expression_string() + \")\";\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn false;\n      }\n\n      bool all_arrays_contiguous_() const {\n\treturn right.all_arrays_contiguous_();\n      }\n \n      bool is_aligned_() const {\n\treturn right.is_aligned_();\n      }\n      \n      template <int n>\n      int alignment_offset_() const {\n\treturn right.template alignment_offset_<n>();\n      }\n\n      // Do not implement value_with_len_\n\n      // Advance the row only, so the left vector is not advanced\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\tright.template advance_location_<MyArrayNum+LArray::n_arrays>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn left.template value_at_location_<MyArrayNum>(loc)\n  \t    * right.template value_at_location_<MyArrayNum+LArray::n_arrays>(loc);\n      }\n\n      // This does not work because the array index is always\n      // increased which it shouldn't be for the left vector. For this\n      // reason, vectorization is turned off (see is_vectorizable\n      // above)\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\t// The LHS of the following multiplication returns a packet\n\t// containing repeated values of the left vector at one\n\t// location\n\treturn Packet<Type>(left.template value_at_location_<MyArrayNum>(loc)) // <- fix!\n\t  * right.template packet_at_location_<MyArrayNum+LArray::n_arrays>(loc);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum] = \n\t  left.template value_at_location_store_<MyArrayNum,MyScratchNum+n_local_scratch>(loc, scratch)\n\t  * right.template value_at_location_store_<MyArrayNum+LArray::n_arrays,\n\t\t\t\t\t   MyScratchNum+LArray::n_scratch+n_local_scratch>(loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn scratch[MyScratchNum];\n      }\n      \n      template <int MyArrayNum, int NArrays>\n      void set_location_(const ExpressionSize<2>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\tleft.template  set_location_<MyArrayNum>(ExpressionSize<1>(i[0]), index);\n\tright.template set_location_<MyArrayNum+LArray::n_arrays>(ExpressionSize<1>(i[1]), index);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n        calc_left_ <MyArrayNum, MyScratchNum>(stack, left,  loc, scratch);\n        calc_right_<MyArrayNum, MyScratchNum>(stack, right, loc, scratch);\n      }\n\n      // As the previous but multiplying the gradient by \"multiplier\"\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, typename MyType>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n        calc_left_ <MyArrayNum, MyScratchNum>(stack, left,  loc, scratch, multiplier);\n        calc_right_<MyArrayNum, MyScratchNum>(stack, right, loc, scratch, multiplier);\n      }\n\n    protected:\n      // Only calculate gradients for left and right arguments if they\n      // are active; otherwise do nothing\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyLType>\n      typename enable_if<MyLType::is_active,void>::type\n      calc_left_(Stack& stack, const MyLType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tMultiply::template calc_left<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyLType>\n      typename enable_if<!MyLType::is_active,void>::type\n      calc_left_(Stack& stack, const MyLType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyRType>\n      typename enable_if<MyRType::is_active,void>::type\n      calc_right_(Stack& stack, const MyRType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tMultiply::template calc_right<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyRType>\n      typename enable_if<!MyRType::is_active,void>::type\n      calc_right_(Stack& stack, const MyRType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const { }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyLType, typename MyType>\n      typename enable_if<MyLType::is_active,void>::type\n      calc_left_(Stack& stack, const MyLType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tMultiply::template calc_left<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch, multiplier);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyLType, typename MyType>\n      typename enable_if<!MyLType::is_active,void>::type\n      calc_left_(Stack& stack, const MyLType& left, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const { }\n\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyRType, typename MyType>\n      typename enable_if<MyRType::is_active,void>::type\n      calc_right_(Stack& stack, const MyRType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const {\n\tMultiply::template calc_right<MyArrayNum, MyScratchNum>(stack, left, right, loc, scratch, multiplier);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch, class MyRType, typename MyType>\n      typename enable_if<!MyRType::is_active,void>::type\n      calc_right_(Stack& stack, const MyRType& right, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch, MyType multiplier) const { }\n    };\n   \n  }\n\n  // Define outer_product function\n  template <typename LType, class L, typename RType, class R>\n  internal::OuterProduct<typename internal::promote<LType,RType>::type,LType,L,RType,R>\n  outer_product(const Expression<LType,L>& l, const Expression<RType,R>& r) {\n    return internal::OuterProduct<typename internal::promote<LType,RType>::type,\n\t\t\t\t  LType,L,RType,R>(l,r);\n  }\n\n}\n\n\n#endif\n"
  },
  {
    "path": "include/adept/quick_e.h",
    "content": "/* quick_e.h -- Fast exponential function for Intel and ARM intrinsics\n\n   Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n   Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n   This file is part of the Adept library, although can be used\n   stand-alone.\n\n   The exponential function for real arguments is used in many areas\n   of physics, yet is not vectorized by many compilers.  This C++\n   header file provides a fast exponential function (quick_e::exp) for\n   single and double precision floating point numbers, Intel\n   intrinsics representing packets of 2, 4, 8 and 16 such numbers, and\n   ARM NEON intrinsics representing 2 doubles or 4 floats.  The\n   algorithm has been taken from Agner Fog's Vector Class Library. It\n   is designed to be used in other libraries that make use of Intel or\n   ARM intrinsics.  Since such libraries often define their own\n   classes for representing vectors of numbers, this file does not\n   define any such classes itself.\n\n   Also in the namespace quick_e, this file defines the following\n   inline functions that work on intrinsics of type \"Vec\" and the\n   corresponding scalar type \"Sca\":\n\n     Vec add(Vec x, Vec y)   Add the elements of x and y\n     Vec sub(Vec x, Vec y)   Subtract the elements of x and y\n     Vec mul(Vec x, Vec y)   Multiply the elements of x and y\n     Vec div(Vec x, Vec y)   Divide the elements of x and y\n     Vec set0<Vec>()         Returns zero in all elements\n     Vec set1<Vec>(Sca a)    Returns all elements set to a\n     Vec sqrt(Vec x)         Square root of all elements\n     Vec fmin(Vec x, Vec y)  Minimum of elements of x and y\n     Vec fmax(Vec x, Vec y)  Maximum of elements of x and y\n     Vec load(const Sca* d)  Aligned load from memory location d\n     Vec loadu(const Sca* d) Unaligned load from memory location d\n     void store(Sca* d, Vec x)  Aligned store of x to d\n     void storeu(Sca* d, Vec x) Unaligned store of x to d\n     Sca hsum(Vec x)         Horizontal sum of elements of x\n     Sca hmul(Vec x)         Horizontal product of elements of x\n     Sca hmin(Vec x)         Horizontal minimum of elements of x\n     Sca hmax(Vec x)         Horizontal maximum of elements of x\n     Vec fma(Vec x, Vec y, Vec z)  Fused multiply-add: (x*y)+z\n     Vec fnma(Vec x, Vec y, Vec z) Returns z-(x*y)\n     Vec pow2n(Vec x)        Returns 2 to the power of x\n     Vec exp(Vec x)          Returns exponential of x\n   \n */\n\n#ifndef QuickE_H\n#define QuickE_H 1\n\n#include <cmath>\n\n// Microsoft compiler doesn't define __SSE2__ even if __AVX__ is\n// defined\n#ifdef __AVX__\n#ifndef __SSE2__\n#define __SSE2__ 1\n#endif\n#endif\n\n// Headers needed for x86 vector intrinsics\n#ifdef __SSE2__\n  #include <xmmintrin.h> // SSE\n  #include <emmintrin.h> // SSE2\n  // Numerous platforms don't define _mm_undefined_ps in xmmintrin.h,\n  // so we assume none do, except GCC >= 4.9.1 and CLANG >= 3.8.0.\n  // Those that don't use an equivalent function that sets the\n  // elements to zero.\n  #define QE_MM_UNDEFINED_PS _mm_setzero_ps\n  #ifdef __clang__\n    #if __has_builtin(__builtin_ia32_undef128)\n      #undef QE_MM_UNDEFINED_PS\n      #define QE_MM_UNDEFINED_PS _mm_undefined_ps\n    #endif\n  #elif defined(__GNUC__)\n    #define GCC_VERSION (__GNUC__ * 10000 \\\n\t\t\t + __GNUC_MINOR__ * 100\t\\\n\t\t\t + __GNUC_PATCHLEVEL__)\n    #if GCC_VERSION >= 40901\n      #undef QE_MM_UNDEFINED_PS\n      #define QE_MM_UNDEFINED_PS _mm_undefined_ps\n    #endif\n    #undef GCC_VERSION\n  #endif // __clang__/__GNUC__\n#endif // __SSE2__\n\n#ifdef __SSE4_1__\n#include <smmintrin.h>\n#endif\n\n#ifdef __AVX__\n  #include <tmmintrin.h> // SSE3\n  #include <immintrin.h> // AVX\n#endif\n\n#ifdef __AVX512F__\n  #include <immintrin.h>\n#endif\n\n#ifdef __ARM_NEON\n  // We only have sufficient floating-point intrinsics to vectorize on\n  // 64-bit ARM targets\n  #if defined(__aarch64__) || defined(_M_ARM64)\n    #define QE_HAVE_ARM64_NEON 1\n    #include \"arm_neon.h\"\n  #endif\n#endif\n\nnamespace quick_e {\n\n  // -------------------------------------------------------------------\n  // Traits\n  // -------------------------------------------------------------------\n\n  template <typename Type, int Size> struct packet {\n    static const bool is_available = false;\n    static const int  size         = 1;\n    typedef Type type;\n  };\n  template <typename Type> struct longest_packet {\n    typedef Type type;\n    static const int size = 1;\n  };\n\n  // g++ issues ugly warnings if VEC is an Intel intrinsic, disabled\n  // with -Wno-ignored-attributes\n#define QE_DEFINE_TRAITS(TYPE, SIZE, VEC, HALF_TYPE)   \\\n  template <> struct packet<TYPE,SIZE> {\t       \\\n    static const bool is_available = true;\t       \\\n    static const int  size = SIZE;\t\t       \\\n    typedef VEC type;\t\t\t\t       \\\n    typedef HALF_TYPE half_type;\t\t       \\\n  };\n\n#define QE_DEFINE_LONGEST(VECS, VECD)\t\t\t\\\n  template <> struct longest_packet<float> {\t\t\\\n    typedef VECS type;\t\t\t\t\t\\\n    static const int size = sizeof(VECS)/sizeof(float);\t\\\n  };\t\t\t\t\t\t\t\\\n  template <> struct longest_packet<double> {\t\t\\\n    typedef VECD type;\t\t\t\t\t\\\n    static const int size = sizeof(VECD)/sizeof(double);\\\n  };\n  \n#ifdef __SSE2__\n  #define QE_HAVE_FAST_EXP 1\n  QE_DEFINE_TRAITS(float, 4, __m128, __m128)\n  QE_DEFINE_TRAITS(double, 2, __m128d, double)\n  #ifdef __AVX__\n    QE_DEFINE_TRAITS(float, 8, __m256, __m128)\n    QE_DEFINE_TRAITS(double, 4, __m256d, __m128d)\n    #ifdef __AVX512F__\n      QE_DEFINE_TRAITS(float, 16, __m512, __m256)\n      QE_DEFINE_TRAITS(double, 8, __m512d, __m256d)\n      QE_DEFINE_LONGEST(__m512, __m512d)\n      #define QE_LONGEST_FLOAT_PACKET 16\n      #define QE_LONGEST_DOUBLE_PACKET 8\n    #else\n      QE_DEFINE_LONGEST(__m256, __m256d)\n      #define QE_LONGEST_FLOAT_PACKET 8\n      #define QE_LONGEST_DOUBLE_PACKET 4\n    #endif\n  #else\n    QE_DEFINE_LONGEST(__m128, __m128d)\n    #define QE_LONGEST_FLOAT_PACKET 4\n    #define QE_LONGEST_DOUBLE_PACKET 2\n  #endif\n  // If QE_AVAILABLE is defined then we can use the fast exponential\n  #define QE_AVAILABLE\n#elif defined(QE_HAVE_ARM64_NEON)\n  #define QE_HAVE_FAST_EXP 1\n  QE_DEFINE_TRAITS(float, 4, float32x4_t, float32x4_t)\n  QE_DEFINE_TRAITS(double, 2, float64x2_t, double)\n  QE_DEFINE_LONGEST(float32x4_t, float64x2_t)\n  #define QE_LONGEST_FLOAT_PACKET 4\n  #define QE_LONGEST_DOUBLE_PACKET 2\n#else\n  // No vectorization available: longest packet is of size 1\n  QE_DEFINE_LONGEST(float, double);\n#define QE_LONGEST_FLOAT_PACKET 1\n#define QE_LONGEST_DOUBLE_PACKET 1\n#endif\n  \n  \n  // -------------------------------------------------------------------\n  // Scalars\n  // -------------------------------------------------------------------\n  \n  // Define a few functions for scalars in order that the same\n  // implementation of \"exp\" can be used for both scalars and SIMD\n  // vectors\n  template <typename T> T add(T x, T y) { return x+y; }\n  template <typename T> T sub(T x, T y) { return x-y; }\n  template <typename T> T mul(T x, T y) { return x*y; }\n  template <typename T> T div(T x, T y) { return x/y; }\n  template <typename T> T neg(T x)      { return -x;  }\n  template <typename T, typename V> void store(T* d, V x) { *d = x;  }\n  template <typename T, typename V> void storeu(T* d, V x){ *d = x;  }\n  template <typename V, typename T> V load(const T* d) { return *d;  }\n  template <typename V, typename T> V loadu(const T* d){ return *d;  }\n  template <typename V, typename T> V set1(T x) { return x;   }\n  template <typename V> inline V set0() { return 0.0; };\n  template <typename T> T sqrt(T x) { return std::sqrt(x); }\n  \n  template <typename T> T hsum(T x) { return x; }\n  template <typename T> T hmul(T x) { return x; }\n  template <typename T> T hmin(T x) { return x; }\n  template <typename T> T hmax(T x) { return x; }\n  \n  template <typename T> T fma(T x, T y, T z)  { return (x*y)+z; }\n  template <typename T> T fnma(T x, T y, T z) { return z-(x*y); }\n  template <typename T> T fmin(T x, T y)  { return std::min(x,y); }\n  template <typename T> T fmax(T x, T y)  { return std::max(x,y); }\n \n#if __cplusplus > 199711L\n  template <> inline float  fmin(float x, float y)   { return std::fmin(x,y); }\n  template <> inline double fmin(double x, double y) { return std::fmin(x,y); }\n  template <> inline float  fmax(float x, float y)   { return std::fmax(x,y); }\n  template <> inline double fmax(double x, double y) { return std::fmax(x,y); }\n#endif\n\n  inline float select_gt(float x1, float x2, float y1, float y2) {\n    if (x1 > x2) { return y1; } else { return y2; }\n  }\n  inline double select_gt(double x1, double x2, double y1, double y2) {\n    if (x1 > x2) { return y1; } else { return y2; }\n  }\n  \n  inline bool all_in_range(float x, float low_bound, float high_bound) {\n    return x >= low_bound && x <= high_bound;\n  }\n  inline bool all_in_range(double x, double low_bound, double high_bound) {\n    return x >= low_bound && x <= high_bound;\n  }\n  \n  // -------------------------------------------------------------------\n  // Macros to define mathematical operations\n  // -------------------------------------------------------------------\n\n  // Basic load store, arithmetic, sqrt, min and max\n#define QE_DEFINE_BASIC(TYPE, VEC, LOAD, LOADU, SET0, SET1,\t\\\n\t\t\tSTORE, STOREU, ADD, SUB, MUL, DIV,\t\\\n\t\t\tSQRT, FMIN, FMAX)\t\t\t\\\n  inline VEC add(VEC x, VEC y)       { return ADD(x, y); }\t\\\n  inline VEC sub(VEC x, VEC y)       { return SUB(x, y); }\t\\\n  inline VEC mul(VEC x, VEC y)       { return MUL(x, y); }\t\\\n  inline VEC div(VEC x, VEC y)       { return DIV(x, y); }\t\\\n  inline VEC neg(VEC x)              { return SUB(SET0(), x); }\t\\\n  template <> inline VEC set0<VEC>()        { return SET0();  }\t\\\n  template <> inline VEC set1<VEC>(TYPE x)  { return SET1(x); }\t\\\n  inline VEC sqrt(VEC x)             { return SQRT(x);   }\t\\\n  inline VEC fmin(VEC x, VEC y)      { return FMIN(x,y); }\t\\\n  inline VEC fmax(VEC x, VEC y)      { return FMAX(x,y); }\t\\\n  template <> inline VEC load<VEC,TYPE>(const TYPE* d)\t\t\\\n  { return LOAD(d);  }\t\t\t\t\t\t\\\n  template <> inline VEC loadu<VEC,TYPE>(const TYPE* d)         \\\n  { return LOADU(d); }\t\t\t\t\t\t\\\n  inline void store(TYPE* d, VEC x)  { STORE(d, x);      }\t\\\n  inline void storeu(TYPE* d, VEC x) { STOREU(d, x);     }\t\\\n  inline std::ostream& operator<<(std::ostream& os, VEC x) {\t\\\n    static const int size = sizeof(VEC)/sizeof(TYPE);\t\t\\\n    union { VEC v; TYPE d[size]; };\t\t\t\t\\\n    v = x; os << \"{\";\t\t\t\t\t\t\\\n    for (int i = 0; i < size; ++i)\t\t\t\t\\\n      { os << \" \" << d[i]; }\t\t\t\t\t\\\n    os << \"}\"; return os;\t\t\t\t\t\\\n  }\n  \n#define QE_DEFINE_CHOP(VEC, HALF_TYPE, LOW, HIGH, PACK)\t\t\\\n  inline HALF_TYPE low(VEC x)   { return LOW;       }\t\t\\\n  inline HALF_TYPE high(VEC x)  { return HIGH;      }\t\t\\\n  inline VEC pack(HALF_TYPE x, HALF_TYPE y) { return PACK; }\n  \n  // Reduction operations: horizontal sum, product, min and max\n#define QE_DEFINE_HORIZ(TYPE, VEC, HSUM, HMUL, HMIN, HMAX)\t\\\n  inline TYPE hsum(VEC x)            { return HSUM(x);   }\t\\\n  inline TYPE hmul(VEC x)            { return HMUL(x);   }\t\\\n  inline TYPE hmin(VEC x)            { return HMIN(x);   }\t\\\n  inline TYPE hmax(VEC x)            { return HMAX(x);   }\n\n  // Define fused multiply-add functions\n#define QE_DEFINE_FMA(TYPE, VEC, FMA, FNMA)\t\t\t\\\n  inline VEC fma(VEC x,VEC y,VEC z)  { return FMA(x,y,z); }\t\\\n  inline VEC fma(VEC x,TYPE y,VEC z)\t\t\t\t\\\n  { return FMA(x,set1<VEC>(y),z); }\t\t\t\t\\\n  inline VEC fma(TYPE x, VEC y, TYPE z)\t\t\t\t\\\n  { return FMA(set1<VEC>(x),y,set1<VEC>(z)); }\t\t\t\\\n  inline VEC fma(VEC x, VEC y, TYPE z)\t\t\t\t\\\n  { return FMA(x,y,set1<VEC>(z)); }\t\t\t\t\\\n  inline VEC fnma(VEC x,VEC y,VEC z) { return FNMA(x,y,z);}\n\n  // Alternative order of arguments for ARM NEON\n#define QE_DEFINE_FMA_ALT(TYPE, VEC, FMA, FNMA)\t\t\t\\\n  inline VEC fma(VEC x,VEC y,VEC z)  { return FMA(z,x,y); }\t\\\n  inline VEC fma(VEC x,TYPE y,VEC z)\t\t\t\t\\\n  { return FMA(z,x,set1<VEC>(y)); }\t\t\t\t\\\n  inline VEC fma(TYPE x, VEC y, TYPE z)\t\t\t\t\\\n  { return FMA(set1<VEC>(z),set1<VEC>(x),y); }\t\t\t\\\n  inline VEC fma(VEC x, VEC y, TYPE z)\t\t\t\t\\\n  { return FMA(set1<VEC>(z),x,y); }\t\t\t\t\\\n  inline VEC fnma(VEC x,VEC y,VEC z) { return FNMA(z,x,y);}\n  \n  // Emulate fused multiply-add if instruction not available\n#define QE_EMULATE_FMA(TYPE, VEC)\t\t\t\t\\\n  inline VEC fma(VEC x,VEC y,VEC z)  { return add(mul(x,y),z);}\t\\\n  inline VEC fma(VEC x,TYPE y,VEC z)\t\t\t\t\\\n  { return add(mul(x,set1<VEC>(y)),z); }\t\t\t\\\n  inline VEC fma(TYPE x, VEC y, TYPE z)\t\t\t\t\\\n  { return add(mul(set1<VEC>(x),y),set1<VEC>(z)); }\t\t\\\n  inline VEC fma(VEC x, VEC y, TYPE z)\t\t\t\t\\\n  { return add(mul(x,y),set1<VEC>(z)); }\t\t\t\\\n  inline VEC fnma(VEC x,VEC y,VEC z) { return sub(z,mul(x,y));}\n\n#define QE_DEFINE_POW2N_S(VEC, VECI, CASTTO, CASTBACK, SHIFTL,  \\\n\t\t\t  SETELEM)\t\t\t\t\\\n  inline VEC pow2n(VEC n) {\t\t\t\t\t\\\n    const float pow2_23 = 8388608.0;\t\t\t\t\\\n    const float bias = 127.0;\t\t\t\t\t\\\n    VEC  a = add(n, set1<VEC>(bias+pow2_23));\t\t\t\\\n    VECI b = CASTTO(a);\t\t\t\t\t\t\\\n    VECI c = SHIFTL(b, SETELEM(23));\t\t\t\t\\\n    VEC  d = CASTBACK(c);\t\t\t\t\t\\\n    return d;\t\t\t\t\t\t\t\\\n  }\n#define QE_DEFINE_POW2N_D(VEC, VECI, CASTTO, CASTBACK, SHIFTL,  \\\n\t\t\t  SETELEM)\t\t\t\t\\\n  inline VEC pow2n(VEC n) {\t\t\t\t\t\\\n    const double pow2_52 = 4503599627370496.0;\t\t\t\\\n    const double bias = 1023.0;\t\t\t\t\t\\\n    VEC  a = add(n, set1<VEC>(bias+pow2_52));\t\t\t\\\n    VECI b = CASTTO(a);\t\t\t\t\t\t\\\n    VECI c = SHIFTL(b, SETELEM(52));\t\t\t\t\\\n    VEC  d = CASTBACK(c);\t\t\t\t\t\\\n    return d;\t\t\t\t\t\t\t\\\n  }\n\n  // -------------------------------------------------------------------\n  // Define operations for SSE2: vector of 4 floats or 2 doubles\n  // -------------------------------------------------------------------\n  \n\n#ifdef __SSE2__\n  QE_DEFINE_BASIC(float, __m128, _mm_load_ps, _mm_loadu_ps,\n\t\t  _mm_setzero_ps, _mm_set1_ps, _mm_store_ps, _mm_storeu_ps,\n\t\t  _mm_add_ps, _mm_sub_ps, _mm_mul_ps, _mm_div_ps,\n\t\t  _mm_sqrt_ps, _mm_min_ps, _mm_max_ps)\n  QE_DEFINE_BASIC(double, __m128d, _mm_load_pd, _mm_loadu_pd,\n\t\t  _mm_setzero_pd, _mm_set1_pd, _mm_store_pd, _mm_storeu_pd,\n\t\t  _mm_add_pd, _mm_sub_pd, _mm_mul_pd, _mm_div_pd,\n\t\t  _mm_sqrt_pd, _mm_min_pd, _mm_max_pd)\n  // Don't define chop operations for __m128 because we don't have a\n  // container for two floats\n  QE_DEFINE_CHOP(__m128d, double, _mm_cvtsd_f64(x),\n\t\t _mm_cvtsd_f64(_mm_unpackhi_pd(x,x)),\n\t\t _mm_set_pd(y,x))\n\t\t \n  // No built-in horizontal operations for SSE2, so need to implement\n  // by hand\n#define QE_DEFINE_HORIZ_SSE2(FUNC, OP_PS, OP_SS, OP_PD)\t\t\t\\\n  inline float FUNC(__m128 x) {\t\t\t\t\t\t\\\n    __m128 shuf = _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1));\t\\\n    __m128 sums = OP_PS(x, shuf);\t\t\t\t\t\\\n    shuf        = _mm_movehl_ps(shuf, sums);\t\t\t\t\\\n    return _mm_cvtss_f32(OP_SS(sums, shuf));\t\t\t\t\\\n  }\t\t\t\t\t\t\t\t\t\\\n  inline double FUNC(__m128d x) {\t\t\t\t\t\\\n    __m128 shuftmp= _mm_movehl_ps(QE_MM_UNDEFINED_PS(),\t\t\t\\\n\t\t\t\t  _mm_castpd_ps(x));\t\t\t\\\n    __m128d shuf  = _mm_castps_pd(shuftmp);\t\t\t\t\\\n    return  _mm_cvtsd_f64(OP_PD(x, shuf));\t\t\t\t\\\n  }\n  QE_DEFINE_HORIZ_SSE2(hsum, _mm_add_ps, _mm_add_ss, _mm_add_pd)\n  QE_DEFINE_HORIZ_SSE2(hmul, _mm_mul_ps, _mm_mul_ss, _mm_mul_pd)\n  QE_DEFINE_HORIZ_SSE2(hmin, _mm_min_ps, _mm_min_ss, _mm_min_pd)\n  QE_DEFINE_HORIZ_SSE2(hmax, _mm_max_ps, _mm_max_ss, _mm_max_pd)\n\n#undef QE_MM_UNDEFINED_PS\n#undef QE_DEFINE_HORIZ_SSE2\n  \n#ifdef __FMA__\n  QE_DEFINE_FMA(float, __m128, _mm_fmadd_ps, _mm_fnmadd_ps)\n  QE_DEFINE_FMA(double, __m128d, _mm_fmadd_pd, _mm_fnmadd_pd)\n#else\n  QE_EMULATE_FMA(float, __m128)\n  QE_EMULATE_FMA(double, __m128d)\n#endif\n#ifdef __SSE4_1__\n  inline __m128 unchecked_round(__m128 x)\n  { return _mm_round_ps(x, (_MM_FROUND_TO_NEAREST_INT\n\t\t\t      |_MM_FROUND_NO_EXC)); }\n  inline __m128d unchecked_round(__m128d x)\n  { return _mm_round_pd(x, (_MM_FROUND_TO_NEAREST_INT\n\t\t\t      |_MM_FROUND_NO_EXC)); }\n#else\n  // No native function available, but since the arguments are limited\n  // to +/- 700, we don't need to check for going out of bounds\n  inline __m128 unchecked_round(__m128 x)\n  { return _mm_cvtepi32_ps(_mm_cvtps_epi32(x)); }\n  inline __m128d unchecked_round(__m128d x)\n  { return _mm_cvtepi32_pd(_mm_cvtpd_epi32(x)); }\n\n#endif\n  inline float unchecked_round(float x)\n  { return _mm_cvtss_f32(unchecked_round(_mm_set_ss(x))); }\n  inline double unchecked_round(double x)\n  { return low(unchecked_round(_mm_set_sd(x))); }\n\n  QE_DEFINE_POW2N_S(__m128, __m128i, _mm_castps_si128,\n\t\t    _mm_castsi128_ps, _mm_sll_epi32, _mm_cvtsi32_si128)\n  QE_DEFINE_POW2N_D(__m128d, __m128i, _mm_castpd_si128,\n\t\t    _mm_castsi128_pd, _mm_sll_epi64, _mm_cvtsi32_si128)\n  inline float pow2n(float x)\n  { return _mm_cvtss_f32(pow2n(quick_e::set1<__m128>(x))); }\n  inline double pow2n(double x)\n  { return low(pow2n(quick_e::set1<__m128d>(x))); }\n\n  \n  inline bool horiz_and(__m128i a) {\n#ifdef __SSE4_1__\n    return _mm_testc_si128(a, _mm_set1_epi32(-1)) != 0;\n#else\n    __m128i t1 = _mm_unpackhi_epi64(a, a); // get 64 bits down\n    __m128i t2 = _mm_and_si128(a, t1);     // and 64 bits\n#ifdef __x86_64__\n    int64_t t5 = _mm_cvtsi128_si64(t2);    // transfer 64 bits to integer\n    return  t5 == int64_t(-1);\n#else\n    __m128i t3 = _mm_srli_epi64(t2, 32);   // get 32 bits down\n    __m128i t4 = _mm_and_si128(t2, t3);    // and 32 bits\n    int     t5 = _mm_cvtsi128_si32(t4);    // transfer 32 bits to integer\n    return  t5 == -1;\n#endif  // __x86_64__\n#endif  // SSE 4.1\n  }\n  inline bool all_in_range(__m128 x, float low_bound, float high_bound) {\n    return horiz_and(_mm_castps_si128(_mm_and_ps(\n\t\t\t _mm_cmpge_ps(x,set1<__m128>(low_bound)),\n\t\t\t _mm_cmple_ps(x,set1<__m128>(high_bound)))));\n  }\n  inline bool all_in_range(__m128d x, double low_bound, double high_bound) {\n    return horiz_and(_mm_castpd_si128(_mm_and_pd(\n\t\t\t _mm_cmpge_pd(x,set1<__m128d>(low_bound)),\n\t\t\t _mm_cmple_pd(x,set1<__m128d>(high_bound)))));\n  }\n\n  // If x1 > x2, select y1, or select y2 otherwise\n  inline __m128 select_gt(__m128 x1, __m128 x2,\n\t\t\t  __m128 y1, __m128 y2) {\n    __m128 mask = _mm_cmpgt_ps(x1,x2);\n#ifdef __SSE4_1__\n    return _mm_blendv_ps(y2, y1, mask);\n#else\n    return _mm_or_ps(_mm_and_ps(mask, y1),\n\t\t     _mm_andnot_ps(mask, y2));\n#endif\n  }\n  inline __m128d select_gt(__m128d x1, __m128d x2,\n\t\t\t   __m128d y1, __m128d y2) {\n    __m128d mask = _mm_cmpgt_pd(x1,x2);\n#ifdef __SSE4_1__\n    return _mm_blendv_pd(y2, y1, mask);\n#else\n    return _mm_or_pd(_mm_and_pd(mask, y1),\n\t\t     _mm_andnot_pd(mask, y2));\n#endif\n  }\n#endif\n\n  // -------------------------------------------------------------------\n  // Define operations for AVX: vector of 8 floats or 4 doubles\n  // -------------------------------------------------------------------\n#ifdef __AVX__\n  QE_DEFINE_BASIC(float, __m256, _mm256_load_ps, _mm256_loadu_ps,\n\t\t  _mm256_setzero_ps, _mm256_set1_ps,\n\t\t  _mm256_store_ps, _mm256_storeu_ps,\n\t\t  _mm256_add_ps, _mm256_sub_ps,\n\t\t  _mm256_mul_ps, _mm256_div_ps, _mm256_sqrt_ps,\n\t\t  _mm256_min_ps, _mm256_max_ps)\n  QE_DEFINE_BASIC(double, __m256d, _mm256_load_pd, _mm256_loadu_pd,\n\t\t  _mm256_setzero_pd, _mm256_set1_pd,\n\t\t  _mm256_store_pd, _mm256_storeu_pd,\n\t\t  _mm256_add_pd, _mm256_sub_pd,\n\t\t  _mm256_mul_pd, _mm256_div_pd, _mm256_sqrt_pd,\n\t\t  _mm256_min_pd, _mm256_max_pd)\n  QE_DEFINE_CHOP(__m256, __m128,\n\t\t _mm256_castps256_ps128(x), _mm256_extractf128_ps(x,1),\n\t\t _mm256_permute2f128_ps(_mm256_castps128_ps256(x),\n\t\t\t\t\t_mm256_castps128_ps256(y), 0x20))\n  QE_DEFINE_CHOP(__m256d, __m128d, _mm256_castpd256_pd128(x),\n\t\t _mm256_extractf128_pd(x,1),\n\t\t _mm256_permute2f128_pd(_mm256_castpd128_pd256(x),\n\t\t\t\t\t_mm256_castpd128_pd256(y), 0x20));\n\n  // Implement by calling SSE2 h* functions\n  inline float  hsum(__m256 x)  { return hsum(add(low(x), high(x))); }\n  inline float  hmul(__m256 x)  { return hmul(mul(low(x), high(x))); }\n  inline float  hmin(__m256 x)  { return hmin(fmin(low(x), high(x))); }\n  inline float  hmax(__m256 x)  { return hmax(fmax(low(x), high(x))); }\n  inline double hsum(__m256d x) { return hsum(add(low(x),  high(x))); } // Alternative would be to use _mm_hadd_pd\n  inline double hmul(__m256d x) { return hmul(mul(low(x),  high(x))); }\n  inline double hmin(__m256d x) { return hmin(fmin(low(x), high(x))); }\n  inline double hmax(__m256d x) { return hmax(fmax(low(x), high(x))); }\n  \n  // Define extras\n#ifdef __FMA__\n  QE_DEFINE_FMA(float, __m256,  _mm256_fmadd_ps, _mm256_fnmadd_ps)\n  QE_DEFINE_FMA(double, __m256d, _mm256_fmadd_pd, _mm256_fnmadd_pd)\n#else\n  QE_EMULATE_FMA(float, __m256)\n  QE_EMULATE_FMA(double, __m256d)\n#endif\n  \n  inline __m256 unchecked_round(__m256 x)\n  { return _mm256_round_ps(x, (_MM_FROUND_TO_NEAREST_INT\n\t\t\t       |_MM_FROUND_NO_EXC)); }\n  inline __m256d unchecked_round(__m256d x)\n  { return _mm256_round_pd(x, (_MM_FROUND_TO_NEAREST_INT\n\t\t\t       |_MM_FROUND_NO_EXC)); }\n  #ifdef __AVX2__\n    QE_DEFINE_POW2N_S(__m256, __m256i, _mm256_castps_si256,\n\t\t      _mm256_castsi256_ps, _mm256_sll_epi32, _mm_cvtsi32_si128)\n    QE_DEFINE_POW2N_D(__m256d, __m256i, _mm256_castpd_si256,\n\t\t      _mm256_castsi256_pd, _mm256_sll_epi64, _mm_cvtsi32_si128)\n  #else\n    // Suboptimized versions call the SSE2 functions on the upper and\n    // lower parts\n    inline __m256 pow2n(__m256 n) {\n      return pack(pow2n(low(n)), pow2n(high(n)));\n    }\n    inline __m256d pow2n(__m256d n) {\n      return pack(pow2n(low(n)), pow2n(high(n)));\n    }\n  #endif\n \n  // Return true if all elements of x are in the range (inclusive) of\n  // low_bound to high_bound.  If so the exp call can exit before the\n  // more costly case of working out what to do with inputs out of\n  // bounds.  Note that _CMP_GE_OS means compare\n  // greater-than-or-equal-to, ordered, signaling, where \"ordered\"\n  // means that if either operand is NaN, the result is false.\n  inline bool all_in_range(__m256 x, float low_bound, float high_bound) {\n    return _mm256_testc_si256(_mm256_castps_si256(_mm256_and_ps(\n\t\t _mm256_cmp_ps(x,set1<__m256>(low_bound), _CMP_GE_OS),\n\t\t _mm256_cmp_ps(x,set1<__m256>(high_bound), _CMP_LE_OS))),\n\t\t\t      _mm256_set1_epi32(-1)) != 0;\n  }\n  inline bool all_in_range(__m256d x, double low_bound, double high_bound) {\n    return _mm256_testc_si256(_mm256_castpd_si256(_mm256_and_pd(\n\t\t _mm256_cmp_pd(x,set1<__m256d>(low_bound), _CMP_GE_OS),\n\t\t _mm256_cmp_pd(x,set1<__m256d>(high_bound), _CMP_LE_OS))),\n\t\t\t      _mm256_set1_epi32(-1)) != 0;\n  }\n  inline __m256 select_gt(__m256 x1, __m256 x2,\n\t\t\t  __m256 y1, __m256 y2) {\n    return _mm256_blendv_ps(y2, y1, _mm256_cmp_ps(x1,x2,_CMP_GT_OS));\n  }\n  inline __m256d select_gt(__m256d x1, __m256d x2,\n\t\t\t   __m256d y1, __m256d y2) {\n    return _mm256_blendv_pd(y2, y1, _mm256_cmp_pd(x1,x2,_CMP_GT_OS));\n  }\n\n#endif\n  \n\n  // -------------------------------------------------------------------\n  // Define operations for AVX512: vector of 16 floats or 8 doubles\n  // -------------------------------------------------------------------\n#ifdef __AVX512F__\n  QE_DEFINE_BASIC(float, __m512, _mm512_load_ps, _mm512_loadu_ps,\n\t\t  _mm512_setzero_ps, _mm512_set1_ps,\n\t\t  _mm512_store_ps, _mm512_storeu_ps,\n\t\t  _mm512_add_ps, _mm512_sub_ps,\n\t\t  _mm512_mul_ps, _mm512_div_ps, _mm512_sqrt_ps,\n\t\t  _mm512_min_ps, _mm512_max_ps)\n  QE_DEFINE_HORIZ(float, __m512,\n\t\t  _mm512_reduce_add_ps, _mm512_reduce_mul_ps,\n\t\t  _mm512_reduce_min_ps, _mm512_reduce_max_ps)\n  QE_DEFINE_BASIC(double, __m512d, _mm512_load_pd, _mm512_loadu_pd,\n\t\t  _mm512_setzero_pd, _mm512_set1_pd,\n\t\t  _mm512_store_pd, _mm512_storeu_pd,\n\t\t  _mm512_add_pd, _mm512_sub_pd,\n\t\t  _mm512_mul_pd, _mm512_div_pd, _mm512_sqrt_pd,\n\t\t  _mm512_min_pd, _mm512_max_pd)\n  QE_DEFINE_HORIZ(double, __m512d,\n\t\t  _mm512_reduce_add_pd, _mm512_reduce_mul_pd,\n\t\t  _mm512_reduce_min_pd, _mm512_reduce_max_pd)\n  \n  inline __m512 unchecked_round(__m512 x)   { return _mm512_roundscale_ps(x, 0); }\n  inline __m512d unchecked_round(__m512d x) { return _mm512_roundscale_pd(x, 0); }\n\n  QE_DEFINE_FMA(float, __m512,  _mm512_fmadd_ps, _mm512_fnmadd_ps)\n  QE_DEFINE_FMA(double, __m512d, _mm512_fmadd_pd, _mm512_fnmadd_pd)\n  \n  QE_DEFINE_POW2N_S(__m512, __m512i, _mm512_castps_si512,\n\t\t    _mm512_castsi512_ps, _mm512_sll_epi32, _mm_cvtsi32_si128)\n  QE_DEFINE_POW2N_D(__m512d, __m512i, _mm512_castpd_si512,\n\t\t    _mm512_castsi512_pd, _mm512_sll_epi64, _mm_cvtsi32_si128)\n\n  inline bool all_in_range(__m512 x, float low_bound, float high_bound) {\n    return static_cast<unsigned short int>(_mm512_kand(\n\t      _mm512_cmp_ps_mask(x,set1<__m512>(low_bound),_CMP_GE_OS),\n\t      _mm512_cmp_ps_mask(x,set1<__m512>(high_bound),_CMP_LE_OS)))\n      == static_cast<unsigned short int>(65535);\n  }\n  inline bool all_in_range(__m512d x, double low_bound, double high_bound) {\n    return static_cast<unsigned short int>(_mm512_kand(\n\t      _mm512_cmp_pd_mask(x,set1<__m512d>(low_bound),_CMP_GE_OS),\n\t      _mm512_cmp_pd_mask(x,set1<__m512d>(high_bound),_CMP_LE_OS)))\n      == static_cast<unsigned short int>(255);\n  }\n  inline __m512 select_gt(__m512 x1, __m512 x2,\n\t\t\t  __m512 y1, __m512 y2) {\n    return _mm512_mask_mov_ps(y2, _mm512_cmp_ps_mask(x1,x2,_CMP_GT_OS), y1);\n  }\n  inline __m512d select_gt(__m512d x1, __m512d x2,\n\t\t\t   __m512d y1, __m512d y2) {\n    return _mm512_mask_mov_pd(y2, _mm512_cmp_pd_mask(x1,x2,_CMP_GT_OS), y1);\n  }\n\n#endif\n\n  \n#ifdef QE_HAVE_ARM64_NEON\n\n  // Implement ARM version of x86 setzero\n  inline float32x4_t vzeroq_f32() { return vdupq_n_f32(0.0); }\n  inline float64x2_t vzeroq_f64() { return vdupq_n_f64(0.0); }\n  // Horizontal multiply across vector\n  inline float vmulvq_f32(float32x4_t x) {\n    union {\n      float32x2_t v;\n      float data[2];\n    };\n    v = vmul_f32(vget_low_f32(x), vget_high_f32(x));\n    return data[0] * data[1];\n  }\n  inline double vmulvq_f64(float64x2_t x) {\n    union {\n      float64x2_t v;\n      double data[2];\n    };\n    v = x;\n    return data[0] * data[1];\n  }\n  \n  QE_DEFINE_BASIC(float, float32x4_t, vld1q_f32, vld1q_f32,\n\t\t  vzeroq_f32, vdupq_n_f32, vst1q_f32, vst1q_f32,\n\t\t  vaddq_f32, vsubq_f32, vmulq_f32, vdivq_f32,\n\t\t  vsqrtq_f32, vminq_f32, vmaxq_f32)\n  QE_DEFINE_HORIZ(float, float32x4_t,\n\t\t  vaddvq_f32, vmulvq_f32,\n\t\t  vminvq_f32, vmaxvq_f32)\n  QE_DEFINE_BASIC(double, float64x2_t, vld1q_f64, vld1q_f64,\n\t\t  vzeroq_f64, vdupq_n_f64, vst1q_f64, vst1q_f64,\n\t\t  vaddq_f64, vsubq_f64, vmulq_f64, vdivq_f64,\n\t\t  vsqrtq_f64, vminq_f64, vmaxq_f64)\n  QE_DEFINE_HORIZ(double, float64x2_t,\n\t\t  vaddvq_f64, vmulvq_f64,\n\t\t  vminvq_f64, vmaxvq_f64)\n  QE_DEFINE_POW2N_S(float32x4_t, int32x4_t, vreinterpretq_s32_f32,\n\t\t    vreinterpretq_f32_s32, vshlq_s32, vdupq_n_s32)\n  QE_DEFINE_POW2N_D(float64x2_t, int64x2_t, vreinterpretq_s64_f64,\n\t\t    vreinterpretq_f64_s64, vshlq_s64, vdupq_n_s64)\n  QE_DEFINE_FMA_ALT(float, float32x4_t, vfmaq_f32, vfmsq_f32)\n  QE_DEFINE_FMA_ALT(double, float64x2_t, vfmaq_f64, vfmsq_f64)\n  inline bool all_in_range(float32x4_t x, double low_bound, double high_bound) {\n    union {\n      uint32x2_t v;\n      uint32_t data[2];\n    };\n    uint32x4_t tmp = vandq_u32(vcgeq_f32(x,vdupq_n_f32(low_bound)),\n\t\t\t       vcleq_f32(x,vdupq_n_f32(high_bound)));\n    v = vand_u32(vget_low_u32(tmp), vget_high_u32(tmp));\n    return data[0] && data[1];\n  }\n  inline bool all_in_range(float64x2_t x, double low_bound, double high_bound) {\n    union {\n      uint64x2_t v;\n      uint64_t data[2];\n    };\n    v = vandq_u64(vcgeq_f64(x,vdupq_n_f64(low_bound)),\n\t\t  vcleq_f64(x,vdupq_n_f64(high_bound)));\n    return data[0] && data[1];\n  }\n\n  inline float32x4_t unchecked_round(float32x4_t x) {\n    return vcvtq_f32_s32(vcvtaq_s32_f32(x));\n  }\n  inline float64x2_t unchecked_round(float64x2_t x) {\n    return vcvtq_f64_s64(vcvtaq_s64_f64(x));\n  }\n  inline float32x4_t select_gt(float32x4_t x1, float32x4_t x2,\n\t\t\t       float32x4_t y1, float32x4_t y2) {\n    return vbslq_f32(vcgtq_f32(x1,x2), y1, y2);\n  }\n  inline float64x2_t select_gt(float64x2_t x1, float64x2_t x2,\n\t\t\t       float64x2_t y1, float64x2_t y2) {\n    return vbslq_f64(vcgtq_f64(x1,x2), y1, y2);\n  }\n\n  inline float unchecked_round(float x)\n  { return vgetq_lane_f32(unchecked_round(vdupq_n_f32(x)), 0); }\n  inline double unchecked_round(double x)\n  { return vgetq_lane_f64(unchecked_round(vdupq_n_f64(x)), 0); }\n\n  inline float pow2n(float x) {\n    return vgetq_lane_f32(pow2n(vdupq_n_f32(x)),0);\n  }\n  inline double pow2n(double x) {\n    return vgetq_lane_f64(pow2n(vdupq_n_f64(x)),0);\n  }\n\n#endif\n \n  \n#ifdef QE_HAVE_FAST_EXP\n  \n  // -------------------------------------------------------------------\n  // Implementation of fast exponential\n  // -------------------------------------------------------------------\n\n  template<typename Type, typename Vec>\n  static inline\n  Vec polynomial_5(Vec const x, Type c0, Type c1, Type c2, Type c3, Type c4, Type c5) {\n    // calculates polynomial c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0\n    using quick_e::fma;\n    Vec x2 = mul(x, x);\n    Vec x4 = mul(x2, x2);\n    return fma(fma(c3, x, c2), x2, fma(fma(c5, x, c4), x4, fma(c1, x, c0)));\n  }\n\n  template<typename Vec>\n  inline\n  Vec fastexp_float(Vec const initial_x) {\n    using namespace quick_e;\n    using quick_e::unchecked_round;\n    using quick_e::fma;\n    \n    // Taylor coefficients\n    const float P0expf   =  1.f/2.f;\n    const float P1expf   =  1.f/6.f;\n    const float P2expf   =  1.f/24.f;\n    const float P3expf   =  1.f/120.f; \n    const float P4expf   =  1.f/720.f; \n    const float P5expf   =  1.f/5040.f; \n    const float VM_LOG2E = 1.44269504088896340736;  // 1/log(2)\n    const float ln2f_hi  =  0.693359375f;\n    const float ln2f_lo  = -2.12194440e-4f;\n#ifndef __FAST_MATH__\n    const float min_x    = -87.3f;\n    const float max_x    = +89.0f;\n#endif\n\n    Vec r = unchecked_round(mul(initial_x,set1<Vec>(VM_LOG2E)));\n    Vec x = fnma(r, set1<Vec>(ln2f_hi), initial_x); //  x -= r * ln2f_hi;\n    x = fnma(r, set1<Vec>(ln2f_lo), x);             //  x -= r * ln2f_lo;\n \n    Vec z = polynomial_5(x,P0expf,P1expf,P2expf,P3expf,P4expf,P5expf);\n\n    Vec x2 = mul(x, x);\n    z = fma(z, x2, x);                       // z *= x2;  z += x;\n\n    // multiply by power of 2 \n    Vec n2 = pow2n(r);\n\n    z = fma(z,n2,n2);\n    \n#ifdef __FAST_MATH__\n    return z;\n#else\n    if (all_in_range(initial_x, min_x, max_x)) {\n      return z;\n    }\n    else {\n      // When initial_x<-87.3, set exp(x) to -Inf\n      z = select_gt(set1<Vec>(min_x), initial_x, set0<Vec>(), z);\n      // When initial_x>+89.0, set exp(x) to +Inf\n      z = select_gt(initial_x, set1<Vec>(max_x),\n\t\t    set1<Vec>(std::numeric_limits<float>::infinity()),\n\t\t    z);\n      return z;\n    }\n#endif\n  }\n\n\n  template <typename Type, typename Vec>\n  Vec polynomial_13m(Vec const x,\n\t\t     Type c2, Type c3, Type c4, Type c5, Type c6, Type c7,\n\t\t     Type c8, Type c9, Type c10, Type c11, Type c12, Type c13) {\n    // calculates polynomial c13*x^13 + c12*x^12 + ... + x + 0\n    using quick_e::fma;\n    \n    Vec x2 = mul(x, x);\n    Vec x4 = mul(x2, x2);\n    //    Vec x8 = mul(x4, x4);\n    return fma(fma(fma(c13, x, c12), x4,\n\t\t   fma(fma(c11, x, c10), x2, fma(c9, x, c8))), mul(x4, x4),\n\t       fma(fma(fma(c7, x, c6), x2, fma(c5, x, c4)), x4,\n\t\t   fma(fma(c3, x, c2), x2, x)));\n    //return fma(fma(fma(fma(fma(fma(fma(fma(fma(fma(fma(fma(c13, x, c12), x, c11), x, c10), x, c9), x, c8), x, c7), x, c6), x, c5), x, c4), x, c3), x, c2), mul(x,x), x);\n    \n  }\n\n  \n  // Template function implementing the fast exponential, where Vec\n  // can be double, __m128d, __m256d or __m512d\n  template <typename Vec>\n  inline\n  Vec fastexp_double(Vec const initial_x) {\n    using namespace quick_e;\n    using quick_e::unchecked_round;\n    using quick_e::fma;\n    \n    const double p2  = 1./2.;\n    const double p3  = 1./6.;\n    const double p4  = 1./24.;\n    const double p5  = 1./120.; \n    const double p6  = 1./720.; \n    const double p7  = 1./5040.; \n    const double p8  = 1./40320.; \n    const double p9  = 1./362880.; \n    const double p10 = 1./3628800.; \n    const double p11 = 1./39916800.; \n    const double p12 = 1./479001600.; \n    const double p13 = 1./6227020800.; \n    const double VM_LOG2E = 1.44269504088896340736;  // 1/log(2)\n    const double ln2d_hi = 0.693145751953125;\n    const double ln2d_lo = 1.42860682030941723212E-6;\n#ifndef __FAST_MATH__\n    const double min_x = -708.39;\n    const double max_x = +709.70;\n#endif\n\n    Vec r = unchecked_round(mul(initial_x,set1<Vec>(VM_LOG2E)));\n    // subtraction in two steps for higher precision\n    Vec x = fnma(r, set1<Vec>(ln2d_hi), initial_x);   //  x -= r * ln2d_hi;\n    x = fnma(r, set1<Vec>(ln2d_lo), x);               //  x -= r * ln2d_lo;\n\n    // multiply by power of 2 \n    Vec n2 = pow2n(r);\n    \n    Vec z = polynomial_13m(x, p2, p3, p4, p5, p6, p7,\n\t\t\t   p8, p9, p10, p11, p12, p13);\n    z = fma(z,n2,n2);\n#ifdef __FAST_MATH__\n    return z;\n#else\n    if (all_in_range(initial_x, min_x, max_x)) {\n      // Fast normal path\n      return z;\n    }\n    else {\n      // When initial_x<-708.39, set exp(x) to 0.0\n      z = select_gt(set1<Vec>(min_x), initial_x, set0<Vec>(), z);\n      // When initial_x>+709.70.0, set exp(x) to +Inf\n      z = select_gt(initial_x, set1<Vec>(max_x),\n\t\t    set1<Vec>(std::numeric_limits<double>::infinity()),\n\t\t    z);\n      return z;\n    }\n#endif\n  }\n#endif\n  \n\n  // Define the various overloads for the quick_e::exp function taking\n  // Intel intrinsics as an argument\n\n#ifdef __SSE2__\n  inline __m128  exp(__m128 x)  { return fastexp_float(x);  }\n  inline __m128d exp(__m128d x) { return fastexp_double(x); }\n#endif\n\n#ifdef __AVX__\n  inline __m256  exp(__m256 x)  { return fastexp_float(x);  }\n  inline __m256d exp(__m256d x) { return fastexp_double(x); }\n#endif\n\n#ifdef __AVX512F__\n  inline __m512  exp(__m512 x)  { return fastexp_float(x);  }\n  inline __m512d exp(__m512d x) { return fastexp_double(x); }\n#endif\n\n#ifdef QE_HAVE_ARM64_NEON\n  inline float32x4_t exp(float32x4_t x) { return fastexp_float(x);  }\n  inline float64x2_t exp(float64x2_t x) { return fastexp_double(x); }\n#endif\n\n  // Define the quick_e::exp function for scalar arguments\n#ifdef QE_HAVE_FAST_EXP\n  inline float  exp(float x)  { return quick_e::fastexp_float(x); }\n  inline double exp(double x) { return quick_e::fastexp_double(x); }\n#else\n  // If no vectorization available then we fall back to the standard\n  // library scalar version\n  inline float  exp(float x)  { return std::exp(x); }\n  inline double exp(double x) { return std::exp(x); }\n#endif\n\n#undef QE_DEFINE_TRAITS\n#undef QE_DEFINE_LONGEST\n#undef QE_DEFINE_BASIC\n#undef QE_DEFINE_CHOP\n#undef QE_DEFINE_HORIZ\n#undef QE_DEFINE_FMA\n#undef QE_DEFINE_FMA_ALT\n#undef QE_EMULATE_FMA\n#undef QE_DEFINE_POW2N_S\n#undef QE_DEFINE_POW2N_D\n#undef QE_HAVE_FAST_EXP\n#undef QE_HAVE_ARM64_NEON\n}\n\n#endif\n"
  },
  {
    "path": "include/adept/reduce.h",
    "content": "/* reduce.h -- \"Reduce\" functions such as find, all, sum etc.\n\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   This file implements a number of array functions whose return\n   values are reduced in either rank or size compared to their\n   arguments.\n\n   The first is the \"find\" function that takes a rank-1 bool\n   Expression, and returns an IntVector of indices to the \"true\"\n   values.  This is modelled on Matlab's \"find\" function.\n\n   A number of further reduce functions are implemented using the same\n   calling style as the equivalent Fortran-90 functions.  They fall\n   into two types:\n     1. sum, mean, product, minval, maxval, norm2\n     2. all, any\n   The first take active or inactive Expression arguments of real or\n   (sometimes) integer type, while the second only take inactive\n   Expressions of bool type.  If called with one Expression argument\n   of any rank, a single value is returned containing the result of\n   the reduce operation on all the elements of the Expression.  If a\n   second integer argument is provided then the operation is carried\n   out along that dimension and an Expression of rank one less than\n   the first argument is returned. These functions are implemented by\n   delegating to a generic \"Reduce\" function that uses policy classes\n   to implement the elemental operations.\n\n*/\n\n#ifndef AdeptReduce_H\n#define AdeptReduce_H\n\n#include <limits>\n#include <algorithm>\n\n#include <adept/Array.h>\n#include <adept/Active.h>\n#include <adept/SpecialMatrix.h>\n#include <adept/array_shortcuts.h>\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Section 1. \"find\"\n  // -------------------------------------------------------------------\n  // This function takes a rank-1 bool Expression, and returns an\n  // IntVector of indices to the \"true\" values.\n  template <class E>\n  inline\n  typename internal::enable_if<E::rank == 1,IntVector>::type\n  find(const Expression<bool, E>& rhs)\n  {\n    ExpressionSize<1> length;\n    // Check the argument of the function is a valid expression\n    if (!rhs.get_dimensions(length)) {\n      std::string str = \"Array size mismatch in \"\n\t+ rhs.expression_string() + \".\";\n      throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n    }\n    // Length of the rank-1 expression\n    Index& len = length[0];\n    // Allocate a return vector of the same length as the expression\n    // in case all values are true\n    IntVector ans(len);\n    // Keep track of the actual number of true values\n    Index true_len = 0;\n    // Get location of first value in expression\n    ExpressionSize<1> coords(0);\n    ExpressionSize<E::n_arrays> loc;\n    rhs.set_location(coords, loc);\n    // Loop over all values in the expression\n    for (int i = 0; i < len; i++) {\n      if (rhs.next_value(loc)) {\n\tans(true_len++) = i;\n      }\n    }\n    if (true_len == 0) {\n      // No values are \"true\": return an empty vector\n      return IntVector();\n    }\n    else if (true_len < len) {\n      // Some values are \"true\": return the part of the \"ans\" vector\n      // that contains indices to these values.  Note that the\n      // following subsetting operation links to the original data\n      // rather than copying it.\n      return ans(range(0,true_len-1));\n    }\n    else {\n      // All values are \"true\": return the entire vector.\n      return ans;\n    }\n  }\n\n  namespace internal {\n\n    // For minval and maxval to work we need starting values for the accumulation\n    template <typename T, class Enable = void>\n    struct numeric_limits { };\n\n    template <typename T>\n    struct numeric_limits<T, typename internal::enable_if<!std::numeric_limits<T>::has_infinity>::type> {\n      static T min_inf() { return std::numeric_limits<T>::min(); }\n      static T max_inf() { return std::numeric_limits<T>::max(); }\n    };\n    template <typename T>\n    struct numeric_limits<T, typename internal::enable_if<std::numeric_limits<T>::has_infinity>::type> {\n      static T min_inf() { return -std::numeric_limits<T>::infinity(); }\n      static T max_inf() { return  std::numeric_limits<T>::infinity(); }\n    };\n\n\n    // -------------------------------------------------------------------\n    // Section 2. Policy classes to enable the generic \"reduce\" function\n    // -------------------------------------------------------------------\n\n    // Sum enables the \"sum\" function that sums its arguments.\n    template <typename T>\n    struct Sum {\n      // What is the type of the running total?\n      typedef T total_type;\n      // Number of extra operations per element, needed for reserving\n      // space in active calculations\n      static const int extra_element_cost = 0;\n      // Do we need to do anything to the final summed value(s)?\n      static const bool finish_needed = false;\n      // Do we need to do anything to the final summed value(s) in the\n      // case that we are doing automatic differentiation?\n      static const bool active_finish_needed = true;\n      // Used by \"expression_string()\"\n      const char* name() { return \"sum\"; }\n      // Start the accumulation with zero\n      T first_value() { return 0; }\n      // Accumulation consists of incrementing \"total\" by the value on\n      // the right hand side; note that the arguments are either of\n      // type T or type Packet<T>\n      template <typename E>\n      void accumulate(E& total, const E& rhs) { total += rhs; }\n      // When the reduce operation is vectorized, packets of data are\n      // accumulated, requiring the ability to horizontally accumulate\n      // each element of the packet, but only the packet2 version is\n      // needed (the original accumulate_packet had problems with the\n      // norm2 function, and is no longer used - can be removed)\n      //T accumulate_packet(const Packet<T>& ptotal) {\n      //  return hsum(ptotal);\n      //}\n      template <typename E>\n      void accumulate_packet2(E& total, const Packet<T>& ptotal) {\n\ttotal += hsum(ptotal);\n      }\n      // In the case of active arguments, the next_value_and_gradient\n      // function pushes the right hand side onto the operation stack,\n      // but does not push the \"total\" object onto the statement\n      // stack.  This is done right at the end of the summation\n      // operations.\n      template <class E, int NArrays>\n      void accumulate_active(Active<T>& total, const E& rhs, \n\t\t\t     ExpressionSize<NArrays>& loc) {\n\ttotal.lvalue() += rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc);\n      }\n      // No need to do anything to the final value\n      template <class X>\n      void finish(X& total, const Index& n) { }\n      // In the active case, the final action is to complete the\n      // storage of the differential statement by pushing the left\n      // hand side onto the statement stack.\n      void finish_active(Active<T>& total, const Index& n) { \n\tADEPT_ACTIVE_STACK->push_lhs(total.gradient_index());\n      }\n    };\n\n    // Mean enables the \"mean\" function - the same as \"sum\" but\n    // dividing the final result by the number of elements averaged.\n    template <typename T>\n    struct Mean {\n      typedef T total_type;\n      static const int extra_element_cost = 0;\n      static const bool finish_needed = true;\n      static const bool active_finish_needed = true;\n      const char* name() { return \"mean\"; }\n      T first_value() { return 0; }\n      template <typename E>\n      void accumulate(E& total, const E& rhs) { total += rhs; }\n      //T accumulate_packet(const Packet<T>& ptotal) {\n      //  return hsum(ptotal);\n      //}\n      template <typename E>\n      void accumulate_packet2(E& total, const Packet<T>& ptotal) {\n\ttotal += hsum(ptotal);\n      }\n      template <class E, int NArrays>\n      void accumulate_active(Active<T>& total, const E& rhs, \n\t\t\t     ExpressionSize<NArrays>& loc) {\n\ttotal.lvalue() += rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc);\n      }\n      template <class X>\n      // Divide by the total number of elements\n      void finish(X& total, const Index& n) { total /= n; }\n      void finish_active(Active<T>& total, const Index& n) { \n\tADEPT_ACTIVE_STACK->push_lhs(total.gradient_index());\n\ttotal /= n;\n      }\n    };\n\n    // Product enables the \"product\" function that multiplies all its\n    // arguments together.\n    template <typename T>\n    struct Product {\n      typedef T total_type;\n      static const int extra_element_cost = 1;\n      static const bool finish_needed = false;\n      static const bool active_finish_needed = false;\n      const char* name() { return \"product\"; }\n      T first_value() { return 1; }\n      template <typename E>\n      void accumulate(E& total, const E& rhs) { total *= rhs; }\n      //T accumulate_packet(const Packet<T>& ptotal) {\n      //  return hprod(ptotal);\n      //}\n      template <typename E>\n      void accumulate_packet2(E& total, const Packet<T>& ptotal) {\n\ttotal *= hprod(ptotal);\n      }\n      template <class E, int NArrays>\n      void accumulate_active(Active<T>& total, const E& rhs, \n\t\t\t     ExpressionSize<NArrays>& loc) {\n\t// Differentiate t = t*x -> dt = t*dx + x*dt.  First compute\n\t// x, while passing t as the last argument so that t*dx is put\n\t// on the operation stack.\n\tT xval = rhs.next_value_and_gradient_special(*ADEPT_ACTIVE_STACK, loc,\n\t\t\t\t\t\t     total.value());\n\t// Now treat x as inactive and Active<T> will do the rest\n\ttotal *= xval;\n      }\n      template <class X>\n      void finish(X& total, const Index& n) { }\n      void finish_active(Active<T>& total, const Index& n) { }\n    };\n\n    // MaxVal enables the \"maxval\" function that returns the maximum value\n    template <typename T>\n    struct MaxVal {\n      typedef T total_type;\n      static const int extra_element_cost = 0;\n      static const bool finish_needed = false;\n      static const bool active_finish_needed = false;\n      const char* name() { return \"maxval\"; }\n      // Initiate the total with the minimum possible value\n      T first_value() { return internal::numeric_limits<T>::min_inf(); }\n#ifdef ADEPT_CXX11_FEATURES\n      void accumulate(T& total, const T& rhs) { \n\tusing std::fmax;\n\ttotal = fmax(total,rhs);\n      }\n      template <typename E>\n      void accumulate_packet2(E& total, const Packet<T>& ptotal) {\n\tusing std::fmax;\n\ttotal = fmax(total,hmax(ptotal));\n      }\n#else\n      void accumulate(T& total, const T& rhs) {\n\tusing std::max;\n\ttotal = max(total,rhs);\n      }\n      template <typename E>\n      void accumulate_packet2(E& total, const Packet<T>& ptotal) {\n\tusing std::max;\n\ttotal = max(total,hmax(ptotal));\n      }\n#endif\n      void accumulate(Packet<T>& total, const Packet<T>& rhs) { total = fmax(total,rhs); }\n      //T accumulate_packet(const Packet<T>& ptotal) {\n      //  return hmax(ptotal);\n      //}\n      template <class E, int NArrays>\n      void accumulate_active(Active<T>& total, const E& rhs, \n\t\t\t     ExpressionSize<NArrays>& loc) {\n\t// The following is not optimal since if a maximum is found\n\t// then the value is evaluated twice. Better would be to\n\t// locate the maximum in the entire array, then do the active\n\t// stuff just for that element.\n\tif (rhs.value_at_location(loc) > total.value()) {\n\t  // The right hand side puts itself on the operation stack,\n\t  // while operator= puts the left hand side on the statement\n\t  // stack.\n\t  total = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc);\n\t}\n\telse {\n\t  rhs.advance_location(loc);\n\t}\n      }\n      template <class X>\n      void finish(X& total, const Index& n) { }\n      void finish_active(Active<T>& total, const Index& n) { }\n    };\n\n    // MinVal enables the \"minval\" function that returns the minimum value\n    template <typename T>\n    struct MinVal {\n      typedef T total_type;\n      static const int extra_element_cost = 0;\n      static const bool finish_needed = false;\n      static const bool active_finish_needed = false;\n      const char* name() { return \"minval\"; }\n      T first_value() { return internal::numeric_limits<T>::max_inf(); }\n#ifdef ADEPT_CXX11_FEATURES\n      void accumulate(T& total, const T& rhs) {\n\tusing std::fmin;\n\ttotal = fmin(total,rhs);\n      }\n      void accumulate_packet2(T& total, const Packet<T>& ptotal) {\n\tusing std::fmin;\n\ttotal = fmin(total,hmin(ptotal));\n      }\n#else\n      void accumulate(T& total, const T& rhs) {\n\tusing std::min;\n\ttotal = min(total,rhs);\n      }\n      void accumulate_packet2(T& total, const Packet<T>& ptotal) {\n\tusing std::min;\n\ttotal = min(total,hmin(ptotal));\n      }\n#endif\n      void accumulate(Packet<T>& total, const Packet<T>& rhs) { total = fmin(total,rhs); }\n      //T accumulate_packet(const Packet<T>& ptotal) {\n      //  return hmin(ptotal);\n      //}\n      template <class E, int NArrays>\n      void accumulate_active(Active<T>& total, const E& rhs, \n\t\t\t     ExpressionSize<NArrays>& loc) {\n\t// The following is not optimal since if a maximum is found\n\t// then the value is evaluated twice\n\tif (rhs.value_at_location(loc) < total.value()) {\n\t  // The right hand side puts itself on the operation stack,\n\t  // while operator= puts the left hand side on the statement\n\t  // stack.\n\t  total = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc);\n\t}\n\telse {\n\t  rhs.advance_location(loc);\n\t}\n      }\n      template <class X>\n      void finish(X& total, const Index& n) { }\n      void finish_active(Active<T>& total, const Index& n) { }\n    };\n  \n    // Norm2 enables the \"norm2\" function that returns the L-2 norm of\n    // its arguments, equal to sqrt(sum(rhs*rhs))\n    template <typename T>\n    struct Norm2 {\n      typedef T total_type;\n      static const int extra_element_cost = 0;\n      static const bool finish_needed = true;\n      static const bool active_finish_needed = true;\n      const char* name() { return \"norm2\"; }\n      T first_value() { return 0; }\n      template <typename E>\n      void accumulate(E& total, const E& rhs) { total += rhs*rhs; }\n      //T accumulate_packet(const Packet<T>& ptotal) {\n      //  return hsum(ptotal);\n      //}\n      // Note that ptotal is already an accumulation of squared\n      // values, so does not need to be squared again\n      template <typename E>\n      void accumulate_packet2(E& total, const Packet<T>& ptotal) {\n\ttotal += hsum(ptotal);\n      }\n      template <class E, int NArrays>\n      void accumulate_active(Active<T>& total, const E& rhs, \n\t\t\t     ExpressionSize<NArrays>& loc) {\n\t// Differentiate t += x*x -> dt += 2*x*dx.  Use the \"special2\"\n\t// version of the following function, where multiplier*x*dx is\n\t// put on the operation stack.\n\tT xval = rhs.next_value_and_gradient_special2(*ADEPT_ACTIVE_STACK,\n\t\t\t\t\t\t      loc, 2.0);\n\t// Now do a purely inactive operation since we will put\n\t// \"total\" on the statement stack only right at the end\n\ttotal.lvalue() += xval*xval;\n      }\n      template <class X>\n      void finish(X& total, const Index& n) {\n\tusing std::sqrt;\n\ttotal = noalias(sqrt(total));\n      }\n      void finish_active(Active<T>& total, const Index& n) {\n\tusing std::sqrt;\n\t// The operation stack now contains the derivatives of all the\n\t// squared elements on the right hand side.  Here we complete\n\t// the differential statement by pushing the left hand side\n\t// onto the statement stack.\n\tADEPT_ACTIVE_STACK->push_lhs(total.gradient_index());\n\t// Since total is active it will do the right thing in the\n\t// final operation.\n\ttotal = noalias(sqrt(total));\n      }\n    };\n\n    // All enables the \"all\" function that returns \"true\" only if all\n    // the bool elements of the right hand side are true.  It would be\n    // faster if it could quit after finding the first \"false\".\n    struct All {\n      typedef bool total_type;\n      static const bool finish_needed = false;\n      const char* name() { return \"all\"; }\n      bool first_value() { return true; }\n      void accumulate(bool& total, const bool& rhs)\n      { total = total && rhs; }\n      template <class X>\n      void finish(X& total, const Index& n) { }\n    };\n\n    // Any enables the \"any\" function that returns \"true\" if any of\n    // the bool elements of the right hand side are true. It would be\n    // faster if it could quite after finding the first \"true\".\n    struct Any {\n      typedef bool total_type;\n      static const bool finish_needed = false;\n      const char* name() { return \"any\"; }\n      bool first_value() { return false; }\n      void accumulate(bool& total, const bool& rhs)\n      { total = total || rhs; }\n      template <class X>\n      void finish(X& total, const Index& n) { }\n    };\n\n    // Count enables the \"count\" function that returns the number of\n    // \"true\" elements in a bool array.\n    struct Count {\n      typedef Index total_type;\n      static const bool finish_needed = false;\n      const char* name() { return \"count\"; }\n      Index first_value() { return 0; }\n      void accumulate(Index& total, const bool& rhs)\n      { total += static_cast<Index>(rhs); } // true=1, false=0\n      template <class X>\n      void finish(X& total, const Index& n) { }\n    };\n\n    // -------------------------------------------------------------------\n    // Section 3. Various versions of the \"reduce\" function\n    // -------------------------------------------------------------------\n\n    // Reduce an entire inactive array, unvectorized\n    template <class Func, typename Type, class E>\n    inline\n    typename internal::enable_if<!(E::is_vectorizable\n\t\t\t &&Packet<Type>::is_vectorized\n\t\t\t &&is_same<Type,typename Func::total_type>::value),\n\t\t       typename Func::total_type>::type\n    reduce_inactive(const Expression<Type, E>& rhs) {\n      typename Func::total_type total;\n      Func f;\n      ExpressionSize<E::rank> dims;\n      // Check right hand side is a valid expression\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims[0] == 0) {\n\t// Return zero if any of these functions applied to an empty\n\t// array\n\ttotal = 0;\n      }\n      else {\n\ttotal = f.first_value();\n\tIndex n = dims.size();\n\tExpressionSize<E::rank> i(0);\n\tExpressionSize<E::n_arrays> loc(0);\n\tint my_rank;\n\tstatic const int last = E::rank-1;\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, loc);\n\t  // Innermost loop\n\t  for ( ; i[last] < dims[last]; ++i[last]) {\n\t    f.accumulate(total, rhs.next_value(loc));\n\t  }\n\t  my_rank = E::rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dims[my_rank]) {\n\t      i[my_rank] = 0;\n\t    }\n\t    else {\n\t      break;\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\tf.finish(total, n);\n      }\n      return total;\n    }\n\n    // Reduce an entire inactive array, vectorized\n    template <class Func, typename Type, class E>\n    inline\n    typename internal::enable_if<E::is_vectorizable\n                       &&Packet<Type>::is_vectorized\n                       &&is_same<Type,typename Func::total_type>::value,\n\t\t       typename Func::total_type>::type\n    reduce_inactive(const Expression<Type, E>& rhs) {\n      typename Func::total_type total;\n      Func f;\n      ExpressionSize<E::rank> dims;\n      // Check right hand side is a valid expression\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims[0] == 0) {\n\t// Return zero if any of these functions applied to an empty\n\t// array\n\ttotal = 0;\n      }\n      else if (dims[E::rank-1] >= Packet<Type>::size*2\n\t       && rhs.all_arrays_contiguous()) {\n\t// Vectorization is possible\n\tPacket<Type> ptotal(f.first_value());\n\tIndex n = dims.size();\n\tExpressionSize<E::rank> i(0);\n\tExpressionSize<E::n_arrays> loc(0);\n\tint my_rank;\n\tstatic const int last = E::rank-1;\n\tint iendvec;\n\tint istartvec = rhs.alignment_offset();\n\ttotal = f.first_value();\n\tif (istartvec < 0) {\n\t  istartvec = iendvec = 0;\n\t}\n\telse {\n\t  // Adjust iendvec such that iendvec-istartvec is a multiple\n\t  // of the packet size\n\t  iendvec = (dims[last]-istartvec);\n\t  iendvec -= (iendvec % Packet<Type>::size);\n\t  iendvec += istartvec;\n\t}\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, loc);\n\t  // Innermost loop\n\t  for ( ; i[last] < istartvec; ++i[last]) {\n\t    f.accumulate(total, rhs.next_value_contiguous(loc));\n\t  }\n\t  for ( ; i[last] < iendvec; i[last] += Packet<Type>::size) {\n\t    f.accumulate(ptotal, rhs.next_packet(loc));\n\t  }\n\t  for ( ; i[last] < dims[last]; ++i[last]) {\n\t    f.accumulate(total, rhs.next_value_contiguous(loc));\n\t  }\n\t  my_rank = E::rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dims[my_rank]) {\n\t      i[my_rank] = 0;\n\t    }\n\t    else {\n\t      break;\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\t// norm2 cannot use accumulate here or elements will be squared twice\n\t//f.accumulate(total, f.accumulate_packet(ptotal));\n\tf.accumulate_packet2(total, ptotal);\n\tf.finish(total, n);\n      }\n      else {\n\t// Back to unvectorized version\n\ttotal = f.first_value();\n\tIndex n = dims.size();\n\tExpressionSize<E::rank> i(0);\n\tExpressionSize<E::n_arrays> loc(0);\n\tint my_rank;\n\tstatic const int last = E::rank-1;\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, loc);\n\t  // Innermost loop\n\t  for ( ; i[last] < dims[last]; ++i[last]) {\n\t    f.accumulate(total, rhs.next_value(loc));\n\t  }\n\t  my_rank = E::rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dims[my_rank]) {\n\t      i[my_rank] = 0;\n\t    }\n\t    else {\n\t      break;\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\tf.finish(total, n);\n      }\n      return total;\n    }\n\n\n    // Reduce the specified dimension of an inactive array of rank > 1\n    template <class Func, typename Type, class E>\n    inline\n    void reduce_dimension(const Expression<Type, E>& rhs, int reduce_dim,\n\t\t    Array<E::rank-1,typename Func::total_type,false>& total) {\n      Func f;\n      ExpressionSize<E::rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims[0] == 0) {\n\t// Return empty array if any of these functions applied to an\n\t// empty array\n\ttotal.clear();\n      }\n      else if (reduce_dim >= E::rank) {\n\tstd::stringstream s;\n\ts << \"In \" << f.name() << \"(Expression<rank=\"\n\t  << E::rank << \">,dim=\" << reduce_dim \n\t  << \"), dim must be less than rank.\";\n\tthrow invalid_dimension(s.str() ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\t// New array has the same dimensions as the input but with one\n\t// of the dimensions removed\n\tExpressionSize<E::rank-1> new_dims;\n\tint jnew = 0;\n\tfor (int j = 0; j < E::rank; ++j) {\n\t  if (j != reduce_dim) {\n\t    new_dims[jnew++] = dims[j];\n\t  }\n\t}\n\ttotal.resize(new_dims);\n\ttotal = f.first_value();\n\tExpressionSize<E::rank> i(0);\n\tExpressionSize<E::rank-1> inew(0);\n\tExpressionSize<E::n_arrays> loc(0);\n\tint my_rank;\n\tstatic const int last = E::rank-1;\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, loc);\n\t  // Innermost loop. Note that indexing of total with inew is\n\t  // not very efficient for high-rank arrays since the\n\t  // location must be computed from all dimensions each time.\n\t  if (reduce_dim == last) {\n\t    for ( ; i[last] < dims[last]; ++i[last]) {\n\t      f.accumulate(total.get_lvalue(inew), rhs.next_value(loc));\n\t    }\n\t  }\n\t  else {\n\t    for ( inew[last-1] = 0; i[last] < dims[last]; \n\t\t ++i[last], ++inew[last-1]) {\n\t      f.accumulate(total.get_lvalue(inew), rhs.next_value(loc));\n\t    }\n\t  }\n\t  // Advancing to next innermost loop is somewhat involved\n\t  // since we have to do something different when we reach the\n\t  // dimension that is being reduced\n\t  my_rank = E::rank-1;\n\t  while (--my_rank >= 0) {\n\t    ++i[my_rank];\n\t    if (my_rank < reduce_dim) {\n\t      ++inew[my_rank];\n\t      if (i[my_rank] >= dims[my_rank]) {\n\t\ti[my_rank] = 0;\n\t\tinew[my_rank] = 0;\n\t      }\n\t      else {\n\t\tbreak;\n\t      }   \n\t    }\n\t    else if (my_rank == reduce_dim) {\n\t      if (i[my_rank] >= dims[my_rank]) {\n\t\ti[my_rank] = 0;\n\t      }\n\t      else {\n\t\tbreak;\n\t      }   \n\t    }\n\t    // The following could be a simple \"else\", but sometimes\n\t    // the compiler optimizes to the extent that it thinks\n\t    // inew[-1] will be accessed (even though it won't),\n\t    // leading to a warning about the array subscript being\n\t    // out of bounds. Here the compiler knows the index must\n\t    // be zero or positive.\n\t    else if (my_rank > 0) {\n\t      ++inew[my_rank-1];\n\t      if (i[my_rank] >= dims[my_rank]) {\n\t\ti[my_rank] = 0;\n\t\tinew[my_rank-1] = 0;\n\t      }\n\t      else {\n\t\tbreak;\n\t      }\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\t\n\tif (f.finish_needed) {\n\t  f.finish(total, dims[reduce_dim]);\n\t}\n      }\n    }\n\n    // Reduce the entirety of an active array\n    template <class Func, typename Type, class E>\n    inline\n    void reduce_active(const Expression<Type, E>& rhs, Active<Type>& total) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\ttotal.lvalue() = reduce_inactive<Func>(rhs);\n\treturn;\n      }\n#endif\n\n      Func f;\n      ExpressionSize<E::rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims[0] == 0) {\n\t// Return zero if any of these functions applied to an empty\n\t// array\n\ttotal = 0;\n      }\n      else {\n\ttotal.set_value(f.first_value());\n\tIndex n = dims.size();\n\tExpressionSize<E::rank> i(0);\n\tExpressionSize<E::n_arrays> loc(0);\n\tint my_rank;\n\tstatic const int last = E::rank-1;\n\t// Check there is enough space on the operation stack by\n\t// working out the cost of all the elements of the array. Note\n\t// that the final operation to compute the total at the end is\n\t// dealt with separately.\n\tADEPT_ACTIVE_STACK->check_space((E::n_active + Func::extra_element_cost) * n);\n\tdo {\n\t  i[last] = 0;\n\t  rhs.set_location(i, loc);\n\t  // Innermost loop\n\t  for ( ; i[last] < dims[last]; ++i[last]) {\n\t    f.accumulate_active(total, rhs, loc);\n\t  }\n\t  my_rank = E::rank-1;\n\t  while (--my_rank >= 0) {\n\t    if (++i[my_rank] >= dims[my_rank]) {\n\t      i[my_rank] = 0;\n\t    }\n\t    else {\n\t      break;\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n\tif (f.active_finish_needed) {\n\t  f.finish_active(total, n);\n\t}\n      }\n    }\n\n    // Reduce the specified dimension of an active array of rank > 1\n    template <class Func, typename Type, class E>\n    inline\n    void reduce_dimension(const Expression<Type, E>& rhs, int reduce_dim,\n\t\tArray<E::rank-1,Type,true>& result) {\n#ifdef ADEPT_RECORDING_PAUSABLE\n      if (!ADEPT_ACTIVE_STACK->is_recording()) {\n\t// This solution requires more shallow copies than are really\n\t// needed; could be made more efficient if Array had a member\n\t// function to link an pre-constructed active Array to\n\t// inactive data.\n\tArray<E::rank-1,Type,false> result_inactive;\n\treduce_dimension<Func>(rhs, reduce_dim, result_inactive);\n\tArray<E::rank-1,Type,true> result_active(result_inactive.data(),\n\t\t\t\t\t\t result_inactive.storage(),\n\t\t\t\t\t\t result_inactive.dimensions(),\n\t\t\t\t\t\t result_inactive.offset());\n\tresult >>= result_active;\n\treturn;\n      }\n#endif\n\n      Func f;\n      ExpressionSize<E::rank> dims;\n      if (!rhs.get_dimensions(dims)) {\n\tstd::string str = \"Array size mismatch in \"\n\t  + rhs.expression_string() + \".\";\n\tthrow size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n      }\n      else if (dims[0] == 0) {\n\t// Return empty array if any of these functions applied to an\n\t// empty array\n\tresult.clear();\n      }\n      else if (reduce_dim >= E::rank) {\n\tstd::stringstream s;\n\ts << \"In \" << f.name() << \"(Expression<rank=\"\n\t  << E::rank << \">,dim=\" << reduce_dim \n\t  << \"), dim must be less than rank.\";\n\tthrow invalid_dimension(s.str() ADEPT_EXCEPTION_LOCATION);\n      }\n      else {\n\t// New array has the same dimensions as the input but with one\n\t// of the dimensions removed\n\tExpressionSize<E::rank-1> new_dims;\n\tint jnew = 0;\n\tfor (int j = 0; j < E::rank; ++j) {\n\t  if (j != reduce_dim) {\n\t    new_dims[jnew++] = dims[j];\n\t  }\n\t}\n\tresult.resize(new_dims);\n\tExpressionSize<E::rank> i(0);\n\tExpressionSize<E::rank-1> inew(0);\n\tExpressionSize<E::n_arrays> loc(0);\n\tint my_rank;\n\tActive<Type> total;\n\tIndex n = dims.size();\n\t// Check there is enough space on the operation stack,\n\t// including the per-element cost, and an additional cost to\n\t// finalize each individual strip of the array. Even though an\n\t// additional check is performed at the end of each completed\n\t// strip, the total number needs to be anticipated beforehand\n\t// (omitting this can cause memory corruption).\n\tADEPT_ACTIVE_STACK->check_space((E::n_active + Func::extra_element_cost) * n + new_dims.size());\n\tdo {\n\t  i[reduce_dim] = 0;\n\t  //\t  total.set_value(f.first_value());\n\t  total = f.first_value();\n\n\t  // Innermost loop. Note that indexing of total with inew is\n\t  // not very efficient for high-rank arrays since the\n\t  // location must be computed from all dimensions each time.\n\t  for ( ; i[reduce_dim] < dims[reduce_dim]; ++i[reduce_dim]) {\n\t    rhs.set_location(i, loc);\n\t    f.accumulate_active(total, rhs, loc);\n\t  }\n\t  if (f.active_finish_needed) {\n\t    f.finish_active(total, dims[reduce_dim]);\n\t  }\n\t  result.get_lvalue(inew) = total;\n\t  my_rank = E::rank;\n\t  while (--my_rank >= 0) {\n\t    if (my_rank == reduce_dim) {\n\t      continue;\n\t    }\n\t    ++i[my_rank];\n\t    if (my_rank < reduce_dim) {\n\t      ++inew[my_rank];\n\t      if (i[my_rank] >= dims[my_rank]) {\n\t\ti[my_rank] = 0;\n\t\tinew[my_rank] = 0;\n\t      }\n\t      else {\n\t\tbreak;\n\t      }   \n\t    }\n\t    else if (my_rank == reduce_dim) {\n\t      if (i[my_rank] >= dims[my_rank]) {\n\t\ti[my_rank] = 0;\n\t      }\n\t      else {\n\t\tbreak;\n\t      }   \n\t    }\n\t    else {\n\t      ++inew[my_rank-1];\n\t      if (i[my_rank] >= dims[my_rank]) {\n\t\ti[my_rank] = 0;\n\t\tinew[my_rank-1] = 0;\n\t      }\n\t      else {\n\t\tbreak;\n\t      }\n\t    }\n\t  }\n\t} while (my_rank >= 0);\n      }\n    }\n\n  }\n\n\n  // -------------------------------------------------------------------\n  // Section 4. Implement the functions\n  // -------------------------------------------------------------------\n\n  // Implement sum(x), sum(x,dim), mean(x), mean(x,dim) etc.\n  // Different versions of the \"reduce\" function are called depending\n  // on whether \"x\" is active and whether \"dim\" is present.\n\n#define DEFINE_REDUCE_FUNCTION(NAME, CLASSNAME)\t\t\\\n  /* function(inactive) */\t\t\t\t\\\n  template <typename Type, class E>\t\t\t\\\n  inline\t\t\t\t\t\t\\\n  typename internal::enable_if<!E::is_active && E::rank != 0,\t\\\n\t\t     Type>::type\t\t\t\\\n  NAME(const Expression<Type, E>& rhs) {\t\t\\\n    return internal::reduce_inactive<internal:: CLASSNAME<Type> >(rhs);\t\\\n  }\t\t\t\t\t\t\t\\\n  \t\t\t\t\t\t\t\\\n  /* function(active) */\t\t\t\t\\\n  template <typename Type, class E>\t\t\t\\\n  inline\t\t\t\t\t\t\\\n  typename internal::enable_if<E::is_active && E::rank != 0,\t\\\n\t\t     Active<Type> >::type\t\t\\\n  NAME(const Expression<Type, E>& rhs) {\t\t\\\n    Active<Type> result;\t\t\t\t\\\n    internal::reduce_active<internal:: CLASSNAME<Type> >(rhs, result);\t\\\n    return result;\t\t\t\t\t\\\n  }\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\\\n  /* function(active[rank=1], dim) */\t\t\t\\\n  template <typename Type, class E>\t\t\t\\\n  inline\t\t\t\t\t\t\\\n  typename internal::enable_if<!E::is_active && E::rank == 1,\t\\\n\t\t\t\t     Type>::type\t\\\n  NAME(const Expression<Type, E>& rhs, int dim) {\t\\\n    if (dim != 0) {\t\t\t\t\t\\\n      throw invalid_dimension(\"Two-argument reduce function applied to vector must have zero as second argument\" \\\n\t\t\t      ADEPT_EXCEPTION_LOCATION);\t\t\\\n    }\t\t\t\t\t\t\t\\\n    return internal::reduce_inactive<internal:: CLASSNAME<Type> >(rhs);\t\\\n  }\t\t\t\t\t\t\t\\\n  \t\t\t\t\t\t\t\\\n  /* function(active[rank=1], dim) */\t\t\t\\\n  template <typename Type, class E>\t\t\t\\\n  inline\t\t\t\t\t\t\\\n  typename internal::enable_if<E::is_active && E::rank == 1,\t\\\n\t\t     Active<Type> >::type\t\t\\\n  NAME(const Expression<Type, E>& rhs, int dim) {\t\\\n    if (dim != 0) {\t\t\t\t\t\\\n      throw invalid_dimension(\"Two-argument reduce function applied to vector must have zero as second argument\" \\\n\t\t\t    ADEPT_EXCEPTION_LOCATION);\t\t\t\\\n    }\t\t\t\t\t\t\t\\\n    Active<Type> result;\t\t\t\t\\\n    internal::reduce_active<internal:: CLASSNAME<Type> >(rhs, result);\t\\\n    return result;\t\t\t\t\t\\\n  }\t\t\t\t\t\t\t\\\n\t\t\t\t\t\t\t\\\n  /* function(inactive[rank>1], dim) */\t\t\t\\\n  /* function(active[rank>1], dim) */\t\t\t\\\n  template <typename Type, class E>\t\t\t\\\n  inline\t\t\t\t\t\t\\\n  typename internal::enable_if<(E::rank > 1),\t\t\\\n\t     Array<E::rank-1,Type,E::is_active> >::type\t\\\n  NAME(const Expression<Type, E>& rhs, int dim) {\t\\\n    Array<E::rank-1,Type,E::is_active> result;\t\t\\\n    internal::reduce_dimension<internal:: CLASSNAME<Type> >(rhs, dim, result); \\\n    return result;\t\t\t\t\t\\\n  }\n\n  DEFINE_REDUCE_FUNCTION(sum, Sum)\n  DEFINE_REDUCE_FUNCTION(mean, Mean)\n  DEFINE_REDUCE_FUNCTION(product, Product)\n  DEFINE_REDUCE_FUNCTION(maxval, MaxVal)\n  DEFINE_REDUCE_FUNCTION(minval, MinVal)\n  DEFINE_REDUCE_FUNCTION(norm2, Norm2)\n\n#undef DEFINE_REDUCE_FUNCTION\n\n\n  // Implement all(x), all(x,dim), any(x) and any(x,dim).  Fewer\n  // possibilities this time as no active versions.\n\n#define DEFINE_BOOL_REDUCE_FUNCTION(NAME, CLASSNAME)\t \\\n  template <class E>\t\t\t\t\t \\\n  inline bool NAME(const Expression<bool, E>& rhs)\t \\\n  { return internal::reduce_inactive<internal:: CLASSNAME>(rhs); }\t\\\n  \t\t\t\t\t\t\t \\\n  template <class E>\t\t\t\t\t \\\n  inline\t\t\t\t\t\t \\\n  Array<E::rank-1,bool,false>\t\t\t\t \\\n  NAME(const Expression<bool, E>& rhs, int dim) {\t \\\n    Array<E::rank-1,bool,false> result;\t\t\t \\\n    internal::reduce_dimension<internal:: CLASSNAME>(rhs, dim, result);\t\\\n    return result;\t\t\t\t\t \\\n  }\n\n  DEFINE_BOOL_REDUCE_FUNCTION(all, All)\n  DEFINE_BOOL_REDUCE_FUNCTION(any, Any)\n#undef DEFINE_BOOL_REDUCE_FUNCTION\n\n  // count(x) and count(x,dim) is slightly different as it returns\n  // Index\n  template <class E>\n  inline Index count(const Expression<bool, E>& rhs)\n  { return internal::reduce_inactive<internal::Count>(rhs); }\n\n  template <class E>\n  inline Array<E::rank-1,Index,false>\n  count(const Expression<bool, E>& rhs, int dim) {\n    Array<E::rank-1,Index,false> result;\n    internal::reduce_dimension<internal::Count>(rhs, dim, result);\n    return result;\n  }\n\n\n  // -------------------------------------------------------------------\n  // Section 5. diag_vector\n  // -------------------------------------------------------------------\n\n  // diag_vector(A,offdiag), where A is a 2D array, returns the\n  // diagonal indexed by \"offdiag\" as a 1D array pointing to the\n  // original data, or the main diagonal if offidag is missing. Can be\n  // used as an lvalue.\n  template <typename Type, bool IsActive>\n  Array<1,Type,IsActive>\n  diag_vector(Array<2,Type,IsActive>& A, Index offdiag = 0) {\n    ExpressionSize<2> dims = A.dimensions();\n    ExpressionSize<2> offset = A.offset();\n    ExpressionSize<1> new_dim, new_offset;\n    new_offset[0] = offset[0]+offset[1];\n    if (offdiag >= 0) {\n      new_dim[0] = std::min(dims[0], dims[1]-offdiag);\n      return Array<1,Type,IsActive>(A.data()+offdiag*offset[1],\n\t\t\t\t    A.storage(), new_dim, new_offset);\n    }\n    else {\n      new_dim[0] = std::min(dims[0]+offdiag, dims[1]);\n      return Array<1,Type,IsActive>(A.data()-offdiag*offset[0],\n\t\t\t\t    A.storage(), new_dim, new_offset);\n    }\n  }\n\n  // diag_vector(A,offdiag), where A is a 2D expression, returns the\n  // diagonal indexed by \"offdiag\" as a 1D array, or the main diagonal\n  // if offidag is missing. Cannot be used as an lvalue.\n  template <typename Type, class E>\n  typename internal::enable_if<E::rank == 2 && !E::is_active,\n\t\t\t       Array<1,Type,E::is_active> >::type\n  diag_vector(const Expression<Type,E>& arg, Index offdiag = 0) {\n    ExpressionSize<2> dims;\n    if (!arg.get_dimensions(dims)) {\n      std::string str;\n      str += \"Array size mismatch in \";\n      str += arg.expression_string();\n      throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n    }\n\n    ExpressionSize<2> i;\n    ExpressionSize<E::n_arrays> ind;\n    if (offdiag >= 0) {\n      Index new_dim = std::min(dims[0], dims[1]-offdiag);\n      Array<1,Type,E::is_active> v(new_dim);\n      for (int j = 0; j < new_dim; ++j) {\n\ti[0] = j;\n\ti[1] = j+offdiag;\n\targ.set_location(i, ind);\n\tv(j) = arg.next_value(ind);\n      }\n      return v;\n    }\n    else {\n      Index new_dim = std::min(dims[0]+offdiag, dims[1]);\n      Array<1,Type,E::is_active> v(new_dim);\n      for (int j = 0; j < new_dim; ++j) {\n\ti[0] = j;\n\ti[1] = j+offdiag;\n\targ.set_location(i, ind);\n\tv(j) = arg.next_value(ind);\n      }\n      return v;\n    }\n  }\n  template <typename Type, class E>\n  typename internal::enable_if<E::rank == 2 && E::is_active,\n\t\t\t       Array<1,Type,E::is_active> >::type\n  diag_vector(const Expression<Type,E>& arg, Index offdiag = 0) {\n    ExpressionSize<2> dims;\n    if (!arg.get_dimensions(dims)) {\n      std::string str;\n      str += \"Array size mismatch in \";\n      str += arg.expression_string();\n      throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n    }\n\n    ExpressionSize<2> i;\n    ExpressionSize<E::n_arrays> ind;\n    if (offdiag >= 0) {\n      Index new_dim = std::min(dims[0], dims[1]-offdiag);\n      Array<1,Type,E::is_active> v(new_dim);\n      for (int j = 0; j < new_dim; ++j) {\n\ti[0] = j;\n\ti[1] = j+offdiag;\n\targ.set_location(i, ind);\n\tv.data()[j] = arg.next_value_and_gradient(*ADEPT_ACTIVE_STACK,ind);\n\tADEPT_ACTIVE_STACK->push_lhs(v.gradient_index()+j);\n      }\n      return v;\n    }\n    else {\n      Index new_dim = std::min(dims[0]+offdiag, dims[1]);\n      Array<1,Type,E::is_active> v(new_dim);\n      for (int j = 0; j < new_dim; ++j) {\n\ti[0] = j;\n\ti[1] = j+offdiag;\n\targ.set_location(i, ind);\n\tv.data()[j] = arg.next_value_and_gradient(*ADEPT_ACTIVE_STACK,ind);\n\tADEPT_ACTIVE_STACK->push_lhs(v.gradient_index()+j);\n      }\n      return v;\n    }\n  }\n\n  // diag_matrix(v,offdiag), where v is a 1D expression, returns a\n  // DiagMatrix whose diagonal is a copy of v. Cannot be used as an\n  // lvalue.\n  template <typename Type, class E>\n  typename internal::enable_if<E::rank == 1,\n       SpecialMatrix<Type, internal::BandEngine<ROW_MAJOR,0,0>,\n\t\t    E::is_active> >::type\n  diag_matrix(const Expression<Type,E>& arg) {\n    Array<1,Type,E::is_active> v = arg;\n    return v.diag_matrix();\n  }\n\n  // -------------------------------------------------------------------\n  // Section 6. dot_product\n  // -------------------------------------------------------------------\n  template <typename LType, typename RType, class L, class R>\n  typename internal::enable_if<L::rank == 1 && R::rank == 1,\n\t     typename internal::active_scalar<typename internal::promote<LType,RType>::type,\n\t\t\t\t     L::is_active || R::is_active>::type>::type\n  dot_product(const Expression<LType,L>& l,\n\t      const Expression<RType,R>& r) {\n    return sum(l*r);\n  }\n\n  // -------------------------------------------------------------------\n  // Section 7. minloc\n  // -------------------------------------------------------------------\n\n  template <typename Type, class E>\n  inline\n  typename internal::enable_if<E::rank == 1, Index>::type\n  minloc(const Expression<Type, E>& rhs) {\n    ExpressionSize<1> length;\n    // Check the argument of the function is a valid expression\n    if (!rhs.get_dimensions(length)) {\n      std::string str = \"Array size mismatch in \"\n\t+ rhs.expression_string() + \".\";\n      throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n    }\n    // Length of the rank-1 expression\n    Index& len = length[0];\n    Type running_min  = internal::numeric_limits<Type>::max_inf();\n    Index running_loc = 0;\n    ExpressionSize<1> coords(0);\n    ExpressionSize<E::n_arrays> loc;\n    rhs.set_location(coords, loc);\n    // Loop over all values in the expression\n    for (Index i = 0; i < len; i++) {\n      Type val = rhs.next_value(loc);\n      if (val < running_min) {\n\trunning_min = val;\n\trunning_loc = i;\n      }\n    }\n    return running_loc;\n  }\n\n  // -------------------------------------------------------------------\n  // Section 8. maxloc\n  // -------------------------------------------------------------------\n\n  template <typename Type, class E>\n  inline\n  typename internal::enable_if<E::rank == 1, Index>::type\n  maxloc(const Expression<Type, E>& rhs) {\n    ExpressionSize<1> length;\n    // Check the argument of the function is a valid expression\n    if (!rhs.get_dimensions(length)) {\n      std::string str = \"Array size mismatch in \"\n\t+ rhs.expression_string() + \".\";\n      throw size_mismatch(str ADEPT_EXCEPTION_LOCATION);\n    }\n    // Length of the rank-1 expression\n    Index& len = length[0];\n    Type running_max  = internal::numeric_limits<Type>::min_inf();\n    Index running_loc = 0;\n    ExpressionSize<1> coords(0);\n    ExpressionSize<E::n_arrays> loc;\n    rhs.set_location(coords, loc);\n    // Loop over all values in the expression\n    for (Index i = 0; i < len; i++) {\n      Type val = rhs.next_value(loc);\n      if (val > running_max) {\n\trunning_max = val;\n\trunning_loc = i;\n      }\n    }\n    return running_loc;\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/scalar_shortcuts.h",
    "content": "/* shortcuts.h -- Definitions of \"shortcut\" typedefs for scalar types\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n\n#ifndef AdeptScalarShortcuts_H\n#define AdeptScalarShortcuts_H\n\n#include <complex>\n\n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n// First the case when automatic differentiation is ON\n\n#include <adept/Active.h>\n\nnamespace adept {\n\n  typedef Active<Real> aReal;\n  typedef Active<float> afloat;\n  typedef Active<double> adouble;\n\n  typedef Active<std::complex<Real> > aComplex;\n  typedef Active<std::complex<float> > aComplexFloat;\n  typedef Active<std::complex<double> > aComplexDouble;\n\n  inline Real value(Real x) { return x; }\n\n} // End namespace adept\n\n\n#else\n// Second the case when automatic differentiation is OFF\n\n#include <adept/base.h>\n\nnamespace adept {\n\n  typedef Real aReal;\n  typedef float afloat;\n  typedef double adouble;\n\n  typedef std::complex<Real> aComplex;\n  typedef std::complex<float> aComplexFloat;\n  typedef std::complex<double> aComplexDouble;\n\n  // Normally value(x) returns the inactive part of x, so if x is\n  // inactive we simply return a constant reference to x\n  template <typename T>\n  inline const T& value(const T& x) { return x; }\n\n  inline Real value(Real x) { return x; }\n\n} // End namespace adept\n\n#endif\n\n#endif\n"
  },
  {
    "path": "include/adept/settings.h",
    "content": "/* settings.h -- View/change the overall Adept settings\n\n    Copyright (C) 2016-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptSettings_H\n#define AdeptSettings_H 1\n\n#include <string>\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Get compiler settings\n  // -------------------------------------------------------------------\n\n  // Return the version of Adept at compile time\n  std::string version();\n\n  // Return the compiler used to compile the Adept library (e.g. \"g++ 4.3.2\")\n  std::string compiler_version();\n\n  // Return the compiler flags used when compiling the Adept library\n  // (e.g. \"-Wall -g -O3\")\n  std::string compiler_flags();\n  \n  // Return a multi-line string listing numerous aspects of the way\n  // Adept has been configured.\n  std::string configuration();\n\n  // Was the library compiled with matrix multiplication support (from\n  // BLAS)?\n  bool have_matrix_multiplication();\n\n  // Was the library compiled with linear algebra support (e.g. inv\n  // and solve from LAPACK)\n  bool have_linear_algebra();\n\n  // -------------------------------------------------------------------\n  // Get/set number of threads for array operations\n  // -------------------------------------------------------------------\n\n  // Get the maximum number of threads available for BLAS operations\n  int max_blas_threads();\n\n  // Set the maximum number of threads available for BLAS operations\n  // (zero means use the maximum sensible number on the current\n  // system), and return the number actually set.  Note that OpenBLAS\n  // uses pthreads and the Jacobian calculation uses OpenMP - this can\n  // lead to inefficient behaviour so if you are computing Jacobians\n  // then you may get better performance by setting the number of\n  // array threads to one.\n  int set_max_blas_threads(int n);\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept/solve.h",
    "content": "/* solve.h -- Solve systems of linear equations\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                             \n#ifndef AdeptSolve_H\n#define AdeptSolve_H 1\n\n#include <vector>\n\n#include <adept/Array.h>\n#include <adept/SpecialMatrix.h>\n\nnamespace adept {\n\n  // -------------------------------------------------------------------\n  // Solve Ax = b for general square matrix A\n  // -------------------------------------------------------------------\n  template <typename T>\n  Array<1,T,false> \n  solve(const Array<2,T,false>& A, const Array<1,T,false>& b);\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for general square matrix A and rectangular matrix B\n  // -------------------------------------------------------------------\n  template <typename T>\n  Array<2,T,false> \n  solve(const Array<2,T,false>& A, const Array<2,T,false>& B);\n\n  // -------------------------------------------------------------------\n  // Solve Ax = b for symmetric square matrix A\n  // -------------------------------------------------------------------\n  template <typename T, SymmMatrixOrientation Orient>\n  Array<1,T,false>\n  solve(const SpecialMatrix<T,internal::SymmEngine<Orient>,false>& A,\n\tconst Array<1,T,false>& b);\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for symmetric square matrix A\n  // -------------------------------------------------------------------\n  template <typename T, SymmMatrixOrientation Orient>\n  Array<2,T,false>\n  solve(const SpecialMatrix<T,internal::SymmEngine<Orient>,false>& A,\n\tconst Array<2,T,false>& B);\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for symmetric square matrices A and B\n  // -------------------------------------------------------------------\n  // Simply copy B into a general dense matrix\n  template <typename T, SymmMatrixOrientation LOrient,\n    SymmMatrixOrientation ROrient>\n  inline\n  Array<2,T,false>\n  solve(const SpecialMatrix<T,internal::SymmEngine<LOrient>,false>& A,\n\tconst SpecialMatrix<T,internal::SymmEngine<ROrient>,false>& B) {\n    Array<2,T,false> B_array = B;\n    return solve(A,B_array);\n  }\n\n  // -------------------------------------------------------------------\n  // Solve Ax = b for general expressions\n  // -------------------------------------------------------------------\n  template <typename LType, class L, typename RType, class R>\n  typename internal::enable_if<L::rank==2 && R::rank==1\n\t\t\t       && !L::is_active && !R::is_active\n\t\t\t       && internal::matrix_op_defined<LType>::value\n\t\t\t       && internal::matrix_op_defined<RType>::value,\n\t\t\t       Array<1,typename internal::promote<LType,RType>::type,false> >::type\n  solve(const Expression<LType,L>& l, const Expression<RType,R>& r) {\n    typedef typename internal::promote<LType,RType>::type PType;\n    Array<2,PType,false> left = l.cast();\n    Array<1,PType,false> right = r.cast();\n    return solve(left,right);\n  }\n\n  // -------------------------------------------------------------------\n  // Solve AX = B for general expressions\n  // -------------------------------------------------------------------\n  template <typename LType, class L, typename RType, class R>\n  typename internal::enable_if<L::rank==2 && R::rank==2\n\t\t\t       && !L::is_active && !R::is_active\n\t\t\t       && internal::matrix_op_defined<LType>::value\n\t\t\t       && internal::matrix_op_defined<RType>::value,\n\t\t\t       Array<2,typename internal::promote<LType,RType>::type,false> >::type\n  solve(const Expression<LType,L>& l, const Expression<RType,R>& r) {\n    typedef typename internal::promote<LType,RType>::type PType;\n    Array<2,PType,false> left = l.cast();\n    Array<2,PType,false> right = r.cast();\n    return solve(left,right);\n  } \n}\n\n#endif\n"
  },
  {
    "path": "include/adept/spread.h",
    "content": "/* spread.h -- Spread an array into an additional dimension\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n*/\n                   \n#ifndef AdeptSpread_H\n#define AdeptSpread_H\n\n#include <adept/Array.h>\n\nnamespace adept {\n\n  namespace internal {\n    \n    // Expression representing the spread of an array into an\n    // additional dimension\n    template <int SpreadDim, typename Type, class E>\n    class Spread : public Expression<Type, Spread<SpreadDim,Type,E> > {\n      typedef Array<E::rank,Type,E::is_active> ArrayType;\n\n    public:\n      // Static data\n      static const int  rank       = E::rank+1;\n      static const bool is_active  = E::is_active;\n      static const int  n_active   = ArrayType::n_active;\n      static const int  n_scratch  = 0;\n      static const int  n_arrays   = ArrayType::n_arrays;\n      // Currently not vectorizable if the final dimension is the\n      // spread dimension because the current design always has the\n      // array index increasing\n      static const bool is_vectorizable = (SpreadDim != E::rank);\n\n    protected:\n      const ArrayType array;\n      ExpressionSize<rank> dims;\n      Index n;\n\n    public:\n      Spread(const Expression<Type,E>& e, Index n_)\n\t: array(e.cast()), n(n_) {\n\tfor (int i = 0; i < SpreadDim; ++i) {\n\t  dims[i] = array.dimension(i);\n\t}\n\tdims[SpreadDim] = n_;\n\tfor (int i = SpreadDim+1; i < rank; ++i) {\n\t  dims[i] = array.dimension(i-1);\n\t}\n\t// Communicate empty array if n == 0\n\tif (n_ == 0) {\n\t  dims[0] = 0;\n\t}\n      }\n\n      bool get_dimensions_(ExpressionSize<rank>& dim) const {\n\tdim = dims;\n\treturn true;\n      }\n\n      std::string expression_string_() const {\n\tstd::stringstream s;\n\ts << \"spread<\" << SpreadDim << \">(\" << array.expression_string()\n\t  << \",\" << n << \")\";\n\treturn s.str();\n      }\n\n      bool is_aliased_(const Type* mem1, const Type* mem2) const {\n\treturn false;\n      }\n\n      bool all_arrays_contiguous_() const {\n\treturn array.all_arrays_contiguous_();\n      }\n\n      bool is_aligned_() const {\n\treturn array.is_aligned_();\n      }\n     \n      template <int N>\n      int alignment_offset_() const {\n\treturn array.template alignment_offset_<N>();\n      }\n\n      // Do not implement value_with_len_\n\n      // Advance only if the spread dimension is not the last\n      template <int MyArrayNum, int NArrays>\n      void advance_location_(ExpressionSize<NArrays>& loc) const {\n\t// If false this if statement should be optimized away\n\tif (SpreadDim < rank-1) {\n\t  array.template advance_location_<MyArrayNum>(loc);\n\t}\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Type value_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn array.template value_at_location_<MyArrayNum>(loc);\n      }\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_at_location_store_(const ExpressionSize<NArrays>& loc,\n\t\t\t\t    ScratchVector<NScratch>& scratch) const {\n\treturn array.template value_at_location_<MyArrayNum>(loc);\n      }\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      Type value_stored_(const ExpressionSize<NArrays>& loc,\n\t\t\t const ScratchVector<NScratch>& scratch) const {\n\treturn array.template value_at_location_<MyArrayNum>(loc);\n      }\n\n      template <int MyArrayNum, int NArrays>\n      Packet<Type> \n      packet_at_location_(const ExpressionSize<NArrays>& loc) const {\n\treturn packet_at_location_local_<SpreadDim==rank-1,MyArrayNum>(loc);\n\n      }\n\n    protected:\n\n      // Specializing for the case when the final dimension is the\n      // final dimension of the wrapped array\n      template <bool IsDuplicate, int MyArrayNum, int NArrays>\n      typename enable_if<!IsDuplicate, Packet<Type> >::type\n      packet_at_location_local_(const ExpressionSize<NArrays>& loc) const {\n\treturn array.template packet_at_location_<MyArrayNum>(loc);\n      }\n\n      // Specializing for the case when the final dimension is to be\n      // \"spread\".  The following does not work because the array\n      // location is incremented for packets when we really want it to\n      // always point to the start of a row.  It is deactivated by\n      // is_vectorizable_ (above).\n      template <bool IsDuplicate, int MyArrayNum, int NArrays>\n      typename enable_if<IsDuplicate, Packet<Type> >::type\n      packet_at_location_local_(const ExpressionSize<NArrays>& loc) const {\n\treturn Packet<Type>(array.template value_at_location_<MyArrayNum>(loc));\n      }\n      \n    public:\n\n      template <int MyArrayNum, int NArrays>\n      void set_location_(const ExpressionSize<rank>& i, \n\t\t\t ExpressionSize<NArrays>& index) const {\n\tExpressionSize<rank-1> i_array(0);\n\tint j = 0;\n\tfor ( ; j < SpreadDim; ++j) {\n\t  i_array[j] = i[j];\n\t}\n\tfor ( ; j < rank-1; ++j) {\n\t  i_array[j] = i[j+1];\n\t}\n\tarray.template set_location_<MyArrayNum>(i_array, index);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch>\n      void calc_gradient_(Stack& stack, const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch) const {\n\tarray.template calc_gradient_<MyArrayNum,MyScratchNum>(stack,loc,scratch);\n      }\n\n      template <int MyArrayNum, int MyScratchNum, int NArrays, int NScratch,\n\t\ttypename MyType>\n      void calc_gradient_(Stack& stack, \n\t\t\t  const ExpressionSize<NArrays>& loc,\n\t\t\t  const ScratchVector<NScratch>& scratch,\n\t\t\t  MyType multiplier) const {\n\tarray.template calc_gradient_<MyArrayNum,MyScratchNum>(stack,loc,\n\t\t\t\t\t\t      scratch,multiplier);\n      }\n\n\n    };\n    \n      \n  }\n\n  // Define spread function applied to an expression\n  template <int SpreadDim, typename Type, class E>\n  typename internal::enable_if<(SpreadDim >= 0 && SpreadDim <= E::rank),\n\t       internal::Spread<SpreadDim,Type,E> >::type\n  spread(const Expression<Type,E>& e, Index n) {\n    return internal::Spread<SpreadDim,Type,E>(e,n);\n  }\n\n  /*\n  // If \"spread\" is applied to a scalar, we expand it to a Vector of\n  // the same type\n  template <int SpreadDim, typename Type>\n  typename internal::enable_if<internal::is_not_expression<Type>::value,\n\t\t\t       Array<1,Type,false> >::type\n  spread(const Type& e, Index n) {\n    Array<1,Type,false> arr(n);\n    arr = e;\n    return arr;\n  }\n  */\n\n}\n\n\n#endif\n"
  },
  {
    "path": "include/adept/store_transpose.h",
    "content": "/* store_transpose.h -- Store the transpose of a vector of Packets\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n   Vectorization of active expressions involves storage of the\n   gradients in an object of type ScratchVector<N,Packet<Real>>, which\n   we need to transpose when placing on the stack.\n\n*/\n\n#ifndef StoreTranspose_H\n#define StoreTranspose_H 1\n\n#include <adept/Packet.h>\n#include <adept/ScratchVector.h>\n\n\nnamespace adept {\n\n  namespace internal {\n\n    // Unvectorized version\n    template <int Len, typename Type>\n    store_transpose(ScratchVector<Len,Packet<Type> >& src, Type* dest) {\n      for (int i = 0; i < Len; ++i) {\n\tunion {\n\t  Packet<Type>::intrinsic_type packet;\n\t  Type[Packet<Type>::size]     array;\n\t}\n\tpacket = src[i];\n\tfor (int j = 0; j < Packet<Type>::size; ++j) {\n\t  dest[j*Len] = array[j];\n\t}\n\t++dest;\n      }\n    }\n\n  }\n}\n\n\n#endif\n"
  },
  {
    "path": "include/adept/traits.h",
    "content": "/* traits.h -- Traits used to support array/automatic differentiation expressions\n\n    Copyright (C) 2012-2014 University of Reading\n    Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptTraits_H\n#define AdeptTraits_H 1\n\n#include <complex>\n#include <limits>\n#include <iostream>\n\n#include <adept/base.h>\n\n#ifdef ADEPT_CXX11_FEATURES\n#include <initializer_list>\n#endif\n\nnamespace adept {\n\n  // Forward declaration of \"Active\"\n  template <typename T> class Active;\n\n\n  // All traits are in the adept::internal namespace.  Note that many\n  // of these are part of the STL in C++11 but are needed so that\n  // Adept can be used with C++98 compilers.\n  namespace internal {\n\n    // ----- CONTENTS -----\n    // 1. ADEPT_STATIC_ASSERT\n    // 2. enable_if\n    // 3. if_then_else\n    // 4. is_not_expression\n    // 5. is_complex\n    // 6. is_active\n    // 7. is_array\n    // 8. is_scalar_int\n    // 9. all_scalar_ints\n    // 10. underlying_real\n    // 11. underlying_passive\n    // 12. promote\n    // 13. rank_compatible\n    // 14. is_same\n    // 15. remove_reference\n    // 16. initializer_list_rank\n    // 17. matrix_op_defined\n    // 18. is_floating_point\n    // --------------------\n\n    // ---------------------------------------------------------------------\n    // 1. ADEPT_STATIC_ASSERT\n    // ---------------------------------------------------------------------\n\n    // Heavily templated C++ code as in the Adept library can produce\n    // very long and cryptic compiler error messages. This macro is\n    // useful to check for conditions that should not happen. It check\n    // a bool known at compile time is true, otherwise fail to compile\n    // with a message that is hopefully understandable.\n    // E.g. ADEPT_STATIC_ASSERT(0 > 1, ZERO_IS_NOT_GREATER_THAN_ONE)\n    // would fail at compile time with a message containing\n    // ERROR_ZERO_IS_NOT_GREATER_THAN_ONE, which should hopefully\n    // stand out even in a long error message.\n\n    // Helper class\n    template<bool> struct compile_time_check \n    { typedef int STATIC_ASSERTION_HAS_FAILED; };\n    template<> struct compile_time_check<false> { };\n\n    // Define the macro in which a struct is defined that inherits\n    // from compile_time_check\n#if defined(__GNUC__) && !defined(__INTEL_COMPILER)\n#pragma GCC diagnostic ignored \"-Wpragmas\"\n#pragma GCC diagnostic ignored \"-Wunused-local-typedefs\"\n#pragma GCC diagnostic warning \"-Wpragmas\"\n#endif\n#define ADEPT_STATIC_ASSERT(condition, msg)\t\t\t\t\\\n    do { struct ERROR_##msg : public ::adept::internal::compile_time_check<(condition)> { }; \\\n\ttypedef typename ERROR_##msg ::STATIC_ASSERTION_HAS_FAILED type; \\\n    } while (0)\n\n    // ---------------------------------------------------------------------\n    // 2. enable_if\n    // ---------------------------------------------------------------------\n\n    // To enable a function \"Type function()\" only if CONDITION is\n    // true, replace \"Type\" in the function declaration with \"typename\n    // enable_if<CONDITIONAL,Type>::type\"\n    template <bool, typename T = void> struct enable_if { };\n    // Partial specialization for true.\n    template <typename T> struct enable_if<true, T> { typedef T type; };\n\n\n    // ---------------------------------------------------------------------\n    // 3. if_then_else\n    // ---------------------------------------------------------------------\n\n    // \"if_then_else<CONDITION, YES, NO>::type\" resolves to YES if\n    // CONDITION is \"true\", NO otherwise. A limitation is that both Y\n    // and N must be valid types\n    template <bool, typename Y, typename N>\n    struct if_then_else { typedef Y type; };\n\n    template <typename Y, typename N>\n    struct if_then_else<false, Y, N> { typedef N type; };\n\n\n    // ---------------------------------------------------------------------\n    // 4. is_not_expression\n    // ---------------------------------------------------------------------\n\n    // The following enables us to provide functions that work only on\n    // types *not* derived from the Expression struct:\n    // \"is_not_expression<E>::value\" is \"false\" if E is not an\n    // expression and \"true\" otherwise\n    template <typename T>\n    struct is_not_expression\n    {\n    private:\n      typedef char yes;\n      typedef struct { char array[2]; } no;\n      template <typename C> static yes test(typename C::_adept_expression_flag*);\n      template <typename C> static no  test(...);\n    public:\n      static const bool value = sizeof(test<T>(0)) != sizeof(yes);\n    };\n\n\n    // ---------------------------------------------------------------------\n    // 5. is_complex\n    // ---------------------------------------------------------------------\n\n    // Test for complex numbers: \"is_complex<S>::value\" is \"true\" if S\n    // is complex, \"false\" otherwise\n    template <typename> struct is_complex\n    { static const bool value = false; };\n    template <> struct is_complex<std::complex<float> > \n    { static const bool value = true; };\n    template <> struct is_complex<std::complex<double> > \n    { static const bool value = true; };\n    template <> struct is_complex<std::complex<long double> > \n    { static const bool value = true; };\n\n\n    // ---------------------------------------------------------------------\n    // 6. is_active\n    // ---------------------------------------------------------------------\n\n    // Test for active numbers: \"is_active<S>::value\" is \"true\" if S\n    // is active, \"false\" otherwise.\n    // Then the default case for non-expressions returns false\n    \n    template <typename T> struct expr_cast; // Forward declaration\n\n    template <typename T, class Enable = void>\n    struct is_active { };\n\n    template <typename T>\n    struct is_active<T, typename enable_if<is_not_expression<T>::value>::type>\n    { static const bool value = false; };\n    \n    // Expressions define a static const bool called \"is_active\"\n    template <typename T>\n    struct is_active<T, typename enable_if<!is_not_expression<T>::value>::type>\n    { static const bool value = expr_cast<T>::is_active; };\n    \n\n    // ---------------------------------------------------------------------\n    // 7. is_array\n    // ---------------------------------------------------------------------\n    \n    /*\n    // \"is_array<E>::value\" is \"true\" if E is an array expression and\n    // \"false\" otherwise.  The default case for non-expressions\n    // returns false\n    template <typename T, class Enable = void>\n    struct is_array { };\n    template <typename T>\n    struct is_array<T, typename enable_if<is_not_expression<T>::value>::type>\n    { static const bool value = false; };\n    // Expressions define a static const bool called \"is_array\"\n    template <typename T>\n    struct is_array<T, typename enable_if<!is_not_expression<T>::value>::type>\n    { static const bool value = T::is_array; };\n    */\n\n    // ---------------------------------------------------------------------\n    // 8. is_scalar_int\n    // ---------------------------------------------------------------------\n\n    // Return whether template argument is of integer type, or is a\n    // 0-dimensional expression of integer type\n    template <typename T, class Enable = void>\n    struct is_scalar_int { };\n    \n    template <typename T>\n    struct is_scalar_int<T, \n\t      typename enable_if<is_not_expression<T>::value>::type> {\n      static const bool value = std::numeric_limits<T>::is_integer;\n      static const int  count = value;\n    };\n    \n    template <typename T>\n    struct is_scalar_int<T, \n\t      typename enable_if<!is_not_expression<T>::value>::type>\n    {\n      static const bool value\n      = std::numeric_limits<typename T::type>::is_integer\n\t&& expr_cast<T>::rank == 0; \n      static const int  count = value;\n    };\n\n\n    // ---------------------------------------------------------------------\n    // 9. all_scalar_ints\n    // ---------------------------------------------------------------------\n\n    // all_scalar_ints<Rank,I0,I1...>::value returns true if I[0] to\n    // I[Rank-1] are all scalar integers\n\n    // First define a \"null\" type\n    struct null_type { };\n    template <typename T> struct is_null_type { \n      static const bool value = false; \n      static const int  count = 0; \n    };\n    template <> struct is_null_type<null_type>{\n      static const bool value = true; \n      static const int  count = 1;\n    };\n\n    template <int Rank, typename I0, typename I1 = null_type, \n\t      typename I2 = null_type, typename I3 = null_type,\n\t      typename I4 = null_type, typename I5 = null_type,\n\t      typename I6 = null_type>\n    struct all_scalar_ints {\n      static const bool value = (Rank == (is_scalar_int<I0>::count\n\t\t\t\t\t  +is_scalar_int<I1>::count\n\t\t\t\t\t  +is_scalar_int<I2>::count\n\t\t\t\t\t  +is_scalar_int<I3>::count\n\t\t\t\t\t  +is_scalar_int<I4>::count\n\t\t\t\t\t  +is_scalar_int<I5>::count\n\t\t\t\t\t  +is_scalar_int<I6>::count));\n    };\n\n\n\n    // ---------------------------------------------------------------------\n    // 10. underlying_real\n    // ---------------------------------------------------------------------\n  \n    // Return the underlying real type for a complex argument:\n    // \"underlying_real<S>::type returns T if S is of type\n    // std::complex<T>, or returns S if it is not complex\n    /*\n    template <typename T>\n    struct underlying_real\n    {\n    private:\n      template <bool, typename S>\n      struct _underlying_real\n      { typedef S type; };\n      template <typename S>\n      struct _underlying_real<true, S>\n      { typedef typename S::type type; };\n    public:\n      typedef typename _underlying_real<is_complex<T>::value,\n\t\t\t\t\tT>::type type;\n    };\n    */\n    template <typename T>\n    struct underlying_real {\n      typedef T type;\n    };\n    template <typename T>\n    struct underlying_real<std::complex<T> > {\n      typedef T type;\n    };\n\t\n    // ---------------------------------------------------------------------\n    // 11. underlying_passive\n    // ---------------------------------------------------------------------\n  \n    // Return the underlying passive type for an active argument:\n    // \"underlying_passive<S>::type returns T if S is of type\n    // adept::Active<T>, or returns S if it is not active.\n    template <typename T>\n    struct underlying_passive\n    {\n    private:\n      template <bool, typename S>\n      struct _underlying_passive\n      { typedef S type; };\n      template <typename S>\n      struct _underlying_passive<true, S>\n      { typedef typename S::type type; };\n    public:\n      typedef typename _underlying_passive<is_active<T>::value,\n\t\t\t\t\tT>::type type;\n    };\n    \n\n    // ---------------------------------------------------------------------\n    // 12. promote\n    // ---------------------------------------------------------------------\n  \n    // \"promote<L,R>::type\" returns the type that a binary operation\n    // (e.g. multiplication) between types L and R should result in.\n    // Note that \"complexity\" and \"precision\" are promoted separately,\n    // so double + std::complex<float> will result in an object of\n    // type std::complex<double> >.\n    template <typename L, typename R>\n    struct promote {\n    private:\n      template <typename A, typename B>\n      struct promote_primitive {\n\tstatic const bool A_bigger_than_B = (sizeof(A) > sizeof(B));\n\tstatic const bool A_float_B_int = (!std::numeric_limits<A>::is_integer) \n\t  && std::numeric_limits<B>::is_integer;\n\tstatic const bool A_int_B_float = std::numeric_limits<A>::is_integer\n\t  && (!std::numeric_limits<B>::is_integer);\n\tstatic const bool prefer_float = A_float_B_int || A_int_B_float;\n\ttypedef typename if_then_else<A_float_B_int, A, B>::type float_type;\n\ttypedef typename if_then_else<A_bigger_than_B, A, B>::type biggest_type;\n\ttypedef typename if_then_else<prefer_float, float_type, biggest_type>::type type;\n      };\n      \n      typedef typename promote_primitive<\n        typename underlying_real<typename underlying_passive<L>::type>::type,\n\ttypename underlying_real<typename underlying_passive<R>::type>::type>::type real;\n      typedef typename if_then_else<is_complex<L>::value\n\t\t\t\t    || is_complex<R>::value,\n\t\t\t\t    std::complex<real>,\n\t\t\t\t    real>::type complex_type;\n    public: \n      typedef typename if_then_else<is_active<L>::value || is_active<R>::value,\n\t\t\t\t    adept::Active<complex_type>, \n\t\t\t\t    complex_type>::type type;\n    };\n\n    // If ever the template arguments are the same\n    // (e.g. Packet<double>), we simply return this type\n    template <typename T>\n    struct promote<T,T> {\n      typedef T type;\n    };\n\n  \n    // ---------------------------------------------------------------------\n    // 13. rank_compatible\n    // ---------------------------------------------------------------------\n\n    // Check that an array of rank LRank could enter an operation\n    // (e.g. addition) with an array of rank RRank: the two ranks must\n    // either be the same, or either can be zero\n    template <int LRank, int RRank>\n    struct rank_compatible {\n      static const bool value = (LRank == RRank || LRank == 0 || RRank == 0);\n    };\n\n\n    // ---------------------------------------------------------------------\n    // 14. is_same\n    // ---------------------------------------------------------------------\n\n    // Compare two types to see if they're the same\n    template<typename T, typename U>\n    struct is_same { static const bool value = false;  };\n    \n    template<typename T>\n    struct is_same<T,T>  { static const bool value = true; };\n    \n\n    // ---------------------------------------------------------------------\n    // 15. remove_reference\n    // ---------------------------------------------------------------------\n\n    // Remove reference from a type if present\n    template<typename T>  struct remove_reference { typedef T type; };\n    template<typename T>  struct remove_reference<T&> { typedef T type; };\n\n\n    // ---------------------------------------------------------------------\n    // 16. initializer_list_rank\n    // ---------------------------------------------------------------------\n#ifdef ADEPT_CXX11_FEATURES\n\n    // initializer_link_rank<T>::value returns 0 if T is not a\n    // std:initializer_list, otherwise it returns the number of nested\n    // std::initializer_list's\n    template <typename T> struct is_initializer_list \n    { static const bool value = false; };\n    template <typename T> struct is_initializer_list<std::initializer_list<T> >\n    { static const bool value = true; };\n\n    template <typename T, class Enable = void>\n    struct initializer_list_rank { };\n\n    template <typename T>\n    struct initializer_list_rank<T,\n\t\t\t\t typename enable_if<!is_initializer_list<T>::value>::type>\n    { typedef T type;\n      static const int value = 0; };\n    \n    template <typename T>\n    struct initializer_list_rank<std::initializer_list<T>,\n\t\t\t\t typename enable_if<!is_initializer_list<T>::value>::type>\n    { typedef T type;\n      static const int value = 1; };\n\n    template <typename T>\n    struct initializer_list_rank<std::initializer_list<T>,\n\t\t\t\t typename enable_if<is_initializer_list<T>::value>::type>\n    { typedef typename initializer_list_rank<T>::type type;\n      static const int value = 1 + initializer_list_rank<T>::value; };\n\n#endif\n\n    // ---------------------------------------------------------------------\n    // 17. matrix_op_defined\n    // ---------------------------------------------------------------------\n\n    // Return true if a type is float or double, false otherwise\n    template <typename T>\n    struct matrix_op_defined { static const bool value = false;  };\n    \n    template <>\n    struct matrix_op_defined<float>  { static const bool value = true; };\n\n    template <>\n    struct matrix_op_defined<double>  { static const bool value = true; };\n \n    // ---------------------------------------------------------------------\n    // 18. is_floating_point\n    // ---------------------------------------------------------------------\n\n    template <typename T>\n    struct is_floating_point { static const bool value = false; };\n\n    template <>\n    struct is_floating_point<float> { static const bool value = true; };\n    template <>\n    struct is_floating_point<double> { static const bool value = true; };\n    template <>\n    struct is_floating_point<long double> { static const bool value = true; };\n\n  } // End namespace internal\n\n} // End namespace adept\n\n\n\n#endif\n"
  },
  {
    "path": "include/adept/vector_utilities.h",
    "content": "/* vector_utilities.h -- Vector utility functions\n\n    Copyright (C) 2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptVectorUtilities_H\n#define AdeptVectorUtilities_H\n\n#include <adept/Array.h>\n\nnamespace adept {\n\n  Array<1,Real,false> linspace(Real x1, Real x2, Index n);\n\n}\n\n#endif\n"
  },
  {
    "path": "include/adept/where.h",
    "content": "/* where.h -- Support for Fortran-90-like \"where\" construct\n\n    Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n  \n   Consider the following:\n\n     A.where(B) = C;\n     A.where(B) = either_or(C, D);\n\n   where A is an Array, B is a boolean expression, and C and D are\n   expressions, and the arrays and expressions have the same rank and\n   size, except that C and or D may have rank zero. The first line has\n   the effect of setting every element of A for which B is true to the\n   corresponding value in C. The second line does this but for\n   elements where B is false it sets A instead to D.\n\n   \n\n*/\n\n\n#ifndef AdeptWhere_H\n#define AdeptWhere_H 1\n\n#include <vector>\n\n#include <adept/Expression.h>\n\nnamespace adept {\n\n  namespace internal {\n\n\n    // ---------------------------------------------------------------------\n    // Section 1. EitherOr object returned by either_or function\n    // ---------------------------------------------------------------------\n    template <class C, class D>\n    class EitherOr {\n    public:\n      typedef bool _adept_either_or_flag;\n      EitherOr(const C& c, const D& d) : either_(c), or_(d) { }\n      const C& value_if_true() const { return either_; }\n      const D& value_if_false() const { return or_; }\n    protected:\n      const C& either_;\n      const D& or_;\n    };\n\n\n    template <typename T>\n    struct is_not_either_or\n    {\n    private:\n      typedef char yes;\n      typedef struct { char array[2]; } no;\n      template <typename C> static yes test(typename C::_adept_either_or_flag*);\n      template <typename C> static no  test(...);\n    public:\n      static const bool value = sizeof(test<T>(0)) != sizeof(yes);\n    };\n\n\n    // ---------------------------------------------------------------------\n    // Section 2. Where class returned by A.where(B)\n    // ---------------------------------------------------------------------\n    template <class A, class B>\n    class Where {\n    public:\n      Where(A& a, const B& b) : array_(a), bool_expr_(b) { }\n\n      template <class C>\n      typename enable_if<is_not_either_or<C>::value, Where&>::type\n      operator=(const C& c) {\n\tarray_.assign_conditional(bool_expr_, c);\n\treturn *this;\n      }\n\n      // With either_or on the right-hand-side: this implementation\n      // could be faster if bool_expr was not evaluated twice\n      template <class C>\n      typename enable_if<!is_not_either_or<C>::value, Where&>::type\n      operator=(const C& c) {\n\tarray_.assign_conditional(!const_cast<B&>(bool_expr_), c.value_if_false());\n\tarray_.assign_conditional(bool_expr_,  c.value_if_true());\n\treturn *this;\n      }\n\n#define ADEPT_WHERE_OPERATOR(EQ_OP, OP)\t\t\t\t\t\\\n      template <class C>\t\t\t\t\t\t\\\n      typename enable_if<is_not_either_or<C>::value, Where&>::type\t\\\n      EQ_OP(const C& c) {\t\t\t\t\t\t\\\n\tarray_.assign_conditional(bool_expr_, noalias(*this) OP c);\t\\\n        return *this;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\\\n      template <class C>\t\t\t\t\t\t\\\n      typename enable_if<!is_not_either_or<C>::value, Where&>::type\t\\\n      EQ_OP(const C& c) {\t\t\t\t\t\t\\\n\tarray_.assign_conditional(!const_cast<B&>(bool_expr_),\t\t\\\n\t\t\t\t  noalias(*this) OP c.value_if_false()); \\\n\tarray_.assign_conditional(bool_expr_,\t\t\t\t\\\n\t\t\t\t  noalias(*this) OP c.value_if_true()); \\\n\treturn *this;\t\t\t\t\t\t\t\\\n      }\t\t\t\t\t\t\t\t\t\n      ADEPT_WHERE_OPERATOR(operator+=, +)\n      ADEPT_WHERE_OPERATOR(operator-=, -)\n      ADEPT_WHERE_OPERATOR(operator*=, *)\n      ADEPT_WHERE_OPERATOR(operator/=, /)\n#undef ADEPT_WHERE_OPERATOR\n\n    protected:\n      A& array_;\n      const B& bool_expr_;\n\n    };\n\n  } // end namespace internal\n\n\n\n  template <class C, class D>\n  internal::EitherOr<C,D> either_or(const C& c, const D& d) {\n    return internal::EitherOr<C,D>(c, d);\n  }\n\n} // end namespace adept\n\n#endif \n"
  },
  {
    "path": "include/adept.h",
    "content": "/* adept.h -- Header file for basic scalar functionality of Adept automatic differentiation library\n\n    Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef Adept_H\n#define Adept_H 1\n\n#include <adept/settings.h>\n#include <adept/UnaryOperation.h>\n#include <adept/BinaryOperation.h>\n#include <adept/Active.h>\n#include <adept/scalar_shortcuts.h>\n\n#endif\n"
  },
  {
    "path": "include/adept_arrays.h",
    "content": "/* adept_arrays.h -- Header file for array functionality of Adept automatic differentiation library\n\n    Copyright (C) 2014-2015 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptArrays_H\n#define AdeptArrays_H 1\n\n#include <adept.h>\n\n#include <adept/Array.h>\n#include <adept/FixedArray.h>\n#include <adept/reduce.h>\n#include <adept/matmul.h>\n#include <adept/solve.h>\n#include <adept/inv.h>\n#include <adept/Allocator.h>\n#include <adept/interp.h>\n#include <adept/spread.h>\n#include <adept/outer_product.h>\n#include <adept/eval.h>\n#include <adept/array_shortcuts.h>\n#include <adept/vector_utilities.h>\n\n#endif\n"
  },
  {
    "path": "include/adept_fortran.h",
    "content": "/* adept_fortran.h -- Interoperability between Adept and Fortran-90 arrays\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n\n   Fortran-90 introduced multi-dimensional arrays with essentially the\n   same basic capabilities as passive Adept arrays, including the\n   ability to index strided data in memory. The improved\n   interoperability features of Fortran 2018 enable Fortran array data\n   to be passed to and from C/C++. This header file enables passive\n   Adept arrays to be passed to and from Fortran.\n\n   PASSING ARRAYS FROM FORTRAN TO C++\n\n   A C++ subroutine callable from Fortran could be declared in C++ as:\n\n     extern \"C\"\n     void adept_subroutine(adept::FortranArray* int_arr,\n                           adept::FortranArray* dbl_arr);\n\n   where FortranArray is a C++ class wrapping the CFI_cdesc_t type\n   that contains the Fortran array descriptor. Within the definition\n   of this function, Adept arrays may be associated with the Fortran\n   data as follows:\n\n     adept::intMatrix imat;\n     adept::associate(imat, int_arr);\n     imat >>= int_arr; // Alternative form\n\n   In this example, the matrix of integers \"imat\" shares its data with\n   the Fortran array int_arr. An exception will be thrown if the\n   Fortran array is not of type integer and rank 2. Note that the\n   array indexing of imat will be in the standard C/C++ convention,\n   zero-based and with the final index varying fastest as memory is\n   traversed. This is opposite to the way the array is accessed in\n   Fortran.  The \">>=\" provides a more succinct way to do the same\n   thing.\n\n   Consider the following: \n\n     adept::Matrix dmat;\n     adept::associate(dmat, dbl_arr, true);\n\n   Here, the third argument \"true\" indicates that the array strides of\n   dmat are to be configured so that the array indices are the same as\n   in Fortran (although still zero based). This will impede\n   optimization of some array expressions using dmat, since the second\n   dimension of dmat will not be contiguous in memory, and this is the\n   dimension that Adept attempts to vectorize.\n\n   PASSING ARRAYS FROM ADEPT TO FORTRAN\n\n   A Fortran-implemented subroutine could be declared in C++ as\n   follows:\n\n     extern \"C\"\n     void fort_subroutine(adept::FortranArray* int_arr,\n                          adept::FortranArray* dbl_arr);\n\n   To call this routine from C++, passing Adept arrays \"imat\" and\n   \"dmat\" as the arguments, we can do simply:\n\n     fort_subroutine(FortranArray(imat), FortranArray(dmat));\n\n*/\n\n\n#ifndef AdeptFortran_H\n#define AdeptFortran_H 1\n\n#include <complex>\n#include <adept_arrays.h>\n\n// GNU defines CFI_type_Bool as \"_Bool\", but this is only available in\n// C99, not C++, so we make it an alias for C++'s \"bool\"\n#ifdef __GNUC__\n#include <stdbool.h>\n#endif\n\n// Load the Fortran array interface into the global namespace\n#include <ISO_Fortran_binding.h>\n\nnamespace adept {\n\n  namespace internal {\n    // Helper types such that cfi_type<X>::type returns the integer\n    // type of \"X\", or fails to compile if it is not possible to send\n    // an array of type X to Fortran\n    template <typename Type> struct cfi_type\n    { }; // Fails to compile if attempt to access \"type\"\n    template <> struct cfi_type<char>\n    { static const CFI_type_t type = CFI_type_signed_char; };\n    template <> struct cfi_type<short>\n    { static const CFI_type_t type = CFI_type_short; };\n    template <> struct cfi_type<int>\n    { static const CFI_type_t type = CFI_type_int; };\n    template <> struct cfi_type<long>\n    { static const CFI_type_t type = CFI_type_long; };\n    template <> struct cfi_type<long long>\n    { static const CFI_type_t type = CFI_type_long_long; };\n    template <> struct cfi_type<bool>\n    { static const CFI_type_t type = CFI_type_Bool; };\n    template <> struct cfi_type<float>\n    { static const CFI_type_t type = CFI_type_float; };\n    template <> struct cfi_type<double>\n    { static const CFI_type_t type = CFI_type_double; };\n    template <> struct cfi_type<long double>\n    { static const CFI_type_t type = CFI_type_long_double; };\n    template <> struct cfi_type<std::complex<float> >\n    { static const CFI_type_t type = CFI_type_float_Complex; };\n    template <> struct cfi_type<std::complex<double> >\n    { static const CFI_type_t type = CFI_type_long_double_Complex; };\n    template <> struct cfi_type<std::complex<long double> >\n    { static const CFI_type_t type = CFI_type_long_double_Complex; };\n  }\n\n  // This class is essentially a wrapper around the CFI_cdesc_t type\n  // which stores a Fortran array descriptor which could be for an\n  // array of any rank or type\n  class FortranArray {\n\n  protected:\n    // Data: the Fortran array descriptor CFI_cdesc_t type, but the\n    // version configured for the maximum allowable Fortran rank\n    CFI_CDESC_T(CFI_MAX_RANK) ad;\n\n  public:\n    // This class either exists as a pointer to a Fortran array passed\n    // in from a Fortran routine, or as an object pointing to an Adept\n    // array that is about to be passed into a Fortran routine.\n    // Therefore it can only be constructed from an existing Adept\n    // array.\n    FortranArray() = delete;\n    \n    // Initialize from Adept array. By default, the dimensions will\n    // need to be accessed in opposite order in Fortran than in\n    // C++/Adept, reflecting the default column-major array access of\n    // the former and row-major array access of the latter. But by\n    // providing preserve_dim_order=true, the dimension access order\n    // will be preserved between the two.\n    template <int Rank, typename Type>\n    FortranArray(adept::Array<Rank,Type>& a,\n\t\t bool preserve_dim_order = false) {\n      init(a, preserve_dim_order);\n    }\n    // No way to ensure that Fortran cannot modify an array,\n    // unfortunately, so we need to cast away the const-ness\n    template <int Rank, typename Type>\n    FortranArray(const adept::Array<Rank,Type>& a,\n\t\t bool preserve_dim_order = false) {\n      init(const_cast<adept::Array<Rank,Type>&>(a), preserve_dim_order);\n    }\n\n  protected:\n    // Constructor implementation: initialize CFI_cdesc_t elements\n    // from Adept array\n    template <int Rank, typename Type>\n    void init(adept::Array<Rank,Type>& a, bool preserve_dim_order) {\n      ADEPT_STATIC_ASSERT(Rank <= CFI_MAX_RANK, ARRAY_RANK_EXCEEDS_FORTRAN_MAXIMUM);\n      ad.base_addr = static_cast<void*>(a.data());\n      ad.elem_len  = sizeof(Type);\n      ad.version   = CFI_VERSION;\n      ad.rank      = Rank;\n      ad.attribute = CFI_attribute_other;\n      ad.type      = internal::cfi_type<Type>::type;\n      if (!preserve_dim_order) {\n\tfor (int irank = 0; irank < Rank; ++irank) {\n\t  ad.dim[irank].lower_bound = 0;\n\t  ad.dim[irank].extent = a.dimension(Rank-irank-1);\n\t  ad.dim[irank].sm = a.offset(Rank-irank-1)*sizeof(Type);\n\t}\n      }\n      else {\n\tfor (int irank = 0; irank < Rank; ++irank) {\n\t  ad.dim[irank].lower_bound = 0;\n\t  ad.dim[irank].extent = a.dimension(irank);\n\t  ad.dim[irank].sm = a.offset(irank)*sizeof(Type);\n\t}\n      }\n    }\n\n  public:\n    // Query the rank and type of the Fortran array\n    int rank() const { return ad.rank; }\n    int type_code() const { return ad.type; }\n\n    // Return \"true\" if the rank or type equal the template parameters\n    // Rank and Type\n    template <int Rank>\n    bool is_rank() const {\n      return (Rank == ad.rank);\n    }\n    template <typename Type>\n    bool is_type() const {\n      return (internal::cfi_type<Type>::type == ad.type\n\t      && sizeof(Type) == ad.elem_len);\n    }\n\n    // Return the length or stride in memory of a particular dimension\n    CFI_index_t dimension(int idim) const { return ad.dim[idim].extent; }\n    CFI_index_t offset(int idim) const { return ad.dim[idim].sm/ad.elem_len; }\n    \n    // Throw an exception if the rank or type differ from the template\n    // parameters Rank and Type\n    template <int Rank, typename Type>\n    void verify() const {\n      if (!is_rank<Rank>()) {\n\tthrow fortran_interoperability_error(\n           \"Rank of Fortran array does not match expected rank\");\n      }\n      else if (!is_type<Type>()) {\n\tthrow fortran_interoperability_error(\n           \"Type of Fortran array does not match expected type\");\n      }\n    }\n\n    // Return a pointer to the underlying data casting to the\n    // specified Type\n    template <typename Type>\n    Type* data() {\n      return static_cast<Type*>(ad.base_addr);\n    }\n\n    // Allow this object to be passed to a function expecting a\n    // pointer\n    operator CFI_cdesc_t*() { return reinterpret_cast<CFI_cdesc_t*>(&ad); }\n    operator FortranArray*() { return this; }\n    \n  };\n\n  // Associate Adept array \"a\" with Fortran array \"fa\" so that\n  // subsequent changes to the elements of \"a\" will be seen within\n  // Fortran when the C++ routine returns.\n  template <int Rank, typename Type>\n  void associate(Array<Rank,Type>& a, FortranArray* fa,\n\t\t bool preserve_dim_order = false) {\n    fa->verify<Rank,Type>(); // Verify rank and type\n    ExpressionSize<Rank> dims, offs;\n    if (!preserve_dim_order) {\n      for (int irank = 0; irank < Rank; ++irank) {\n\tdims[Rank-irank-1] = fa->dimension(irank);\n\toffs[Rank-irank-1] = fa->offset(irank);\n      }\n    }\n    else {\n      for (int irank = 0; irank < Rank; ++irank) {\n\tdims[irank] = fa->dimension(irank);\n\toffs[irank] = fa->offset(irank);\n      }\n    }\n    a.clear();\n    a = Array<Rank,Type>(static_cast<Type*>(fa->data<Type>()), 0, dims, offs);\n  }\n\n  // Associate Adept array \"a\" with a general Fortran array descriptor\n  // \"cd\", noting that we only verify that the rank and type match\n  // when the \"associate\" function above is called.\n  template <int Rank, typename Type>\n  void associate(Array<Rank,Type>& a, CFI_cdesc_t* cd,\n\t\t bool preserve_dim_order = false) {\n    FortranArray* fa = reinterpret_cast<FortranArray*>(cd);\n    associate(a, fa, preserve_dim_order);\n  }\n\n  // Enable link of an Adept array to a Fortran array using the >>=\n  // operator\n  template<int Rank, typename Type>\n  void operator>>=(adept::Array<Rank,Type>& a, FortranArray* fa) {\n    associate(a,fa);\n  }\n  template<int Rank, typename Type>\n  void operator>>=(adept::Array<Rank,Type>& a, CFI_cdesc_t* cd) {\n    associate(a,cd);\n  }\n\n} // End namespace adept\n\n#endif\n"
  },
  {
    "path": "include/adept_optimize.h",
    "content": "/* adept_optimize.h -- Header file for optimization algorithms of Adept library\n\n    Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n    This file is part of the Adept library.\n\n*/\n\n#ifndef AdeptOptimize_H\n#define AdeptOptimize_H 1\n\n#include <adept_arrays.h>\n\n#include <adept/Optimizable.h>\n#include <adept/Minimizer.h>\n\n#endif\n"
  },
  {
    "path": "include/create_adept_source_header",
    "content": "#!/bin/sh\n# This script creates a header file \"adept_source.h\" containing the\n# ../adept/*.h ../adept/*.cpp source files; why this is useful is explained below.\n\nADEPT_SOURCE_HEADER=adept_source.h\nrm -f $ADEPT_SOURCE_HEADER\n\necho \"Creating $ADEPT_SOURCE_HEADER\"\n\necho \"/* $ADEPT_SOURCE_HEADER - Source code for the Adept library\n\n  Copyright (C) 2012-2015 The University of Reading\n  Copyright (C) 2015-     European Centre for Medium-Range Weather Forecasts\n\n  Licensed under the Apache License, Version 2.0 (the \\\"License\\\"); you\n  may not use this file except in compliance with the License.  You\n  may obtain a copy of the License at\n\n      http://www.apache.org/licenses/LICENSE-2.0\n\n  Unless required by applicable law or agreed to in writing, software\n  distributed under the License is distributed on an \\\"AS IS\\\" BASIS,\n  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n  implied.  See the License for the specific language governing\n  permissions and limitations under the License.\n\n\n  This file was created automatically by script $0 \n  on \"$(date)\"\n\n  It contains a concatenation of the source files from the Adept\n  library. The idea is that a program may #include this file in one of\n  its source files (typically the one containing the main function),\n  and then the Adept library will be built into the executable without\n  the need to link to an external library. All other source files\n  should just #include <adept.h> or <adept_arrays.h>. The ability to\n  use Adept in this way makes it easier to distribute an Adept package\n  that is usable on non-Unix platforms that are unable to use the\n  autoconf configure script to build external libraries.\n\n  If HAVE_BLAS is defined below then matrix multiplication will be\n  enabled; the BLAS library should be provided at the link stage\n  although no header file is required.  If HAVE_LAPACK is defined\n  below then linear algebra routines will be enabled (matrix inverse\n  and solving linear systems of equations); again, the LAPACK library\n  should be provided at the link stage although no header file is\n  required.\n\n*/\n\n/* Feel free to delete this warning: */\n#ifdef _MSC_FULL_VER \n#pragma message(\\\"warning: the adept_source.h header file has not been edited so BLAS matrix multiplication and LAPACK linear-algebra support have been disabled\\\")\n#else\n#warning \\\"The adept_source.h header file has not been edited so BLAS matrix multiplication and LAPACK linear-algebra support have been disabled\\\"\n#endif\n\n/* Uncomment this if you are linking to the BLAS library (header file\n   not required) to enable matrix multiplication */\n//#define HAVE_BLAS 1\n\n/* Uncomment this if you are linking to the LAPACK library (header\n   file not required) */\n//#define HAVE_LAPACK 1\n\n/* Uncomment this if you have the cblas.h header from OpenBLAS */\n//#define HAVE_OPENBLAS_CBLAS_HEADER\n\n/*\n\n  The individual source files now follow.\n\n*/\n\n#ifndef AdeptSource_H\n#define AdeptSource_H 1\n\n\" > $ADEPT_SOURCE_HEADER\n\nfor FILE in ../config_platform_independent.h ../adept/*.h ../adept/*.cpp\ndo\n    echo \"   Adding $FILE\"\n    echo \"\n\n// =================================================================\n// Contents of $(basename $FILE)\n// =================================================================\n\" >> $ADEPT_SOURCE_HEADER\n    cat $FILE >> $ADEPT_SOURCE_HEADER\ndone\n\necho \"\n\n#endif\n\" >> $ADEPT_SOURCE_HEADER\necho \"Done\"\n"
  },
  {
    "path": "m4/adept.m4",
    "content": "# ---------------------------------------------------------------------------\n# FILE         : adept.m4\n# COPYRIGHT    : 2018- ECMWF\n# AUTHOR       : Alessio Bozzo\n# LICENSE      : Apache License Version 2.0\n# ----------------------------------------------------------------------------\n#\n# This software is licensed under the terms of the Apache Licence\n# Version 2.0 which can be obtained at\n# http://www.apache.org/licenses/LICENSE-2.0. In applying this\n# licence, ECMWF does not waive the privileges and immunities granted\n# to it by virtue of its status as an intergovernmental organisation\n# nor does it submit to any jurisdiction.\n#\n# ----------------------------------------------------------------------------\n#\n# This file contains a macro processor (m4 file) to enable autotools\n# to locate the Adept C++ library (version 2.0 or greater).  The file\n# should be placed in the m4 directory of your package. If you have\n# aclocal.m4 in your top-level directory then it will be found\n# automatically; otherwise you will need the following in your\n# configure.ac file:\n#\n#   m4_include([m4/adept.m4])\n#\n# Usage is then as follows in the configure.ac file\n#\n#   AX_CHECK_ADEPT([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])\n#\n# for example:\n#\n#   AX_CHECK_ADEPT([have_adept=yes], [have_adept=no])\n#\n# This creates variables ADEPT_LDFLAGS and ADEPT_CPPFLAGS, and adds\n# them to LDFLAGS and CPPFLAGS.\n#\n# The macro looks for the Adept library in system directories, but the\n# user can specify another location by passing an argument to the\n# configure script as follows:\n#\n#   ./configure --with-adept=/home/me/apps/adept-2.1\n#\n# ----------------------------------------------------------------------------\n\ndnl defines a custom macro\nAC_DEFUN([AX_CHECK_ADEPT], [\n\n      dnl provides a framework to handle the --with-{arg} values passed to configure on the command line      \n      AC_ARG_WITH([adept],\n            [AS_HELP_STRING([--with-adept=DIR], [use Adept Library from directory DIR])],\n            adept_prefix=\"$with_adept\"\n            []\n            )\n      \n      AS_IF([test x$adept_prefix != x],\n            [AS_IF([test -d \"$adept_prefix/lib\"],\n                  [ADEPT_LDFLAGS=\"-L$adept_prefix/lib -Wl,-rpath,$adept_prefix/lib -ladept\"\n                  ADEPT_CPPFLAGS=\"-I$adept_prefix/include\"],\n\t\t  [test -d \"$adept_prefix/lib64\"],\n                  [ADEPT_LDFLAGS=\"-L$adept_prefix/lib64 -Wl,-rpath,$adept_prefix/lib64 -ladept\"\n                  ADEPT_CPPFLAGS=\"-I$adept_prefix/include\"],\n                  [AC_MSG_ERROR([\n  -----------------------------------------------------------------------------\n     --with-adept=$adept_prefix is not a valid directory\n  -----------------------------------------------------------------------------])])],\n      [AC_MSG_WARN([\n  -----------------------------------------------------------------------------\n   Missing option `--with-adept=DIR`. Looking for Adept Library\n   into Linux default library search paths\n  -----------------------------------------------------------------------------])]\n           )\n     \n      LDFLAGS=\"$ADEPT_LDFLAGS $LDFLAGS\"\n      CPPFLAGS=\"$ADEPT_CPPFLAGS $CPPFLAGS\"\n      ax_have_adept=yes\n      dnl checks for ADEPT\n      AC_MSG_CHECKING([for Adept >= 2.0.4: including adept_arrays.h and linking via -ladept])\n      AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <adept_arrays.h>\n      #include <string>\n      #if ADEPT_VERSION < 20004\n      #error \"Adept version >= 2.0.4 required\"\n      #endif],[std::string test = adept::compiler_version()])],AC_MSG_RESULT([yes]),AC_MSG_RESULT([no])\n      AC_MSG_ERROR([Unable to find Adept library version >= 2.0.4]))\n\n      AS_IF([test \"x$ax_have_adept\" = xyes],\n            dnl outputing Adept Library\n            [AC_SUBST([ADEPT_LDFLAGS])\n            AC_SUBST([ADEPT_CPPFLAGS])\n            $1],\n            [$2])\n      ]\n)\ndnl vim:set softtabstop=4 shiftwidth=4 expandtab:\n"
  },
  {
    "path": "m4/ax_blas.m4",
    "content": "# ===========================================================================\n#          http://www.gnu.org/software/autoconf-archive/ax_blas.html\n# ===========================================================================\n#\n# SYNOPSIS\n#\n#   AX_BLAS([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])\n#\n# DESCRIPTION\n#\n#   This macro looks for a library that implements the BLAS linear-algebra\n#   interface (see http://www.netlib.org/blas/). On success, it sets the\n#   BLAS_LIBS output variable to hold the requisite library linkages.\n#\n#   To link with BLAS, you should link with:\n#\n#     $BLAS_LIBS $LIBS $FLIBS\n#\n#   in that order. FLIBS is the output variable of the\n#   AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is\n#   sometimes necessary in order to link with F77 libraries. Users will also\n#   need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same\n#   reason.\n#\n#   Many libraries are searched for, from ATLAS to CXML to ESSL. The user\n#   may also use --with-blas=<lib> in order to use some specific BLAS\n#   library <lib>. In order to link successfully, however, be aware that you\n#   will probably need to use the same Fortran compiler (which can be set\n#   via the F77 env. var.) as was used to compile the BLAS library.\n#\n#   ACTION-IF-FOUND is a list of shell commands to run if a BLAS library is\n#   found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is\n#   not found. If ACTION-IF-FOUND is not specified, the default action will\n#   define HAVE_BLAS.\n#\n# LICENSE\n#\n#   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>\n#\n#   This program is free software: you can redistribute it and/or modify it\n#   under the terms of the GNU General Public License as published by the\n#   Free Software Foundation, either version 3 of the License, or (at your\n#   option) any later version.\n#\n#   This program is distributed in the hope that it will be useful, but\n#   WITHOUT ANY WARRANTY; without even the implied warranty of\n#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General\n#   Public License for more details.\n#\n#   You should have received a copy of the GNU General Public License along\n#   with this program. If not, see <http://www.gnu.org/licenses/>.\n#\n#   As a special exception, the respective Autoconf Macro's copyright owner\n#   gives unlimited permission to copy, distribute and modify the configure\n#   scripts that are the output of Autoconf when processing the Macro. You\n#   need not follow the terms of the GNU General Public License when using\n#   or distributing such scripts, even though portions of the text of the\n#   Macro appear in them. The GNU General Public License (GPL) does govern\n#   all other use of the material that constitutes the Autoconf Macro.\n#\n#   This special exception to the GPL applies to versions of the Autoconf\n#   Macro released by the Autoconf Archive. When you make and distribute a\n#   modified version of the Autoconf Macro, you may extend this special\n#   exception to the GPL to apply to your modified version as well.\n\n#serial 14\n\nAU_ALIAS([ACX_BLAS], [AX_BLAS])\nAC_DEFUN([AX_BLAS], [\nAC_PREREQ(2.50)\nAC_REQUIRE([AC_F77_LIBRARY_LDFLAGS])\nAC_REQUIRE([AC_CANONICAL_HOST])\nax_blas_ok=no\n\nAC_ARG_WITH(blas,\n\t[AS_HELP_STRING([--with-blas=<lib>], [use BLAS library <lib>])])\ncase $with_blas in\n\tyes | \"\") ;;\n\tno) ax_blas_ok=disable ;;\n\t-* | */* | *.a | *.so | *.so.* | *.o) BLAS_LIBS=\"$with_blas\" ;;\n\t*) BLAS_LIBS=\"-l$with_blas\" ;;\nesac\n\n# Get fortran linker names of BLAS functions to check for.\nAC_F77_FUNC(sgemm)\nAC_F77_FUNC(dgemm)\n\nax_blas_save_LIBS=\"$LIBS\"\nLIBS=\"$LIBS $FLIBS\"\n\n# First, check BLAS_LIBS environment variable\nif test $ax_blas_ok = no; then\nif test \"x$BLAS_LIBS\" != x; then\n\tsave_LIBS=\"$LIBS\"; LIBS=\"$BLAS_LIBS $LIBS\"\n\tAC_MSG_CHECKING([for $sgemm in $BLAS_LIBS])\n\tAC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes], [BLAS_LIBS=\"\"])\n\tAC_MSG_RESULT($ax_blas_ok)\n\tLIBS=\"$save_LIBS\"\nfi\nfi\n\n# BLAS linked to by default?  (happens on some supercomputers)\nif test $ax_blas_ok = no; then\n\tsave_LIBS=\"$LIBS\"; LIBS=\"$LIBS\"\n\tAC_MSG_CHECKING([if $sgemm is being linked in already])\n\tAC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes])\n\tAC_MSG_RESULT($ax_blas_ok)\n\tLIBS=\"$save_LIBS\"\nfi\n\n# BLAS in OpenBLAS library? (http://xianyi.github.com/OpenBLAS/)\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(openblas, $sgemm, [ax_blas_ok=yes\n\t\t\t                BLAS_LIBS=\"-lopenblas\"])\nfi\n\n# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(atlas, ATL_xerbla,\n\t\t[AC_CHECK_LIB(f77blas, $sgemm,\n\t\t[AC_CHECK_LIB(cblas, cblas_dgemm,\n\t\t\t[ax_blas_ok=yes\n\t\t\t BLAS_LIBS=\"-lcblas -lf77blas -latlas\"],\n\t\t\t[], [-lf77blas -latlas])],\n\t\t\t[], [-latlas])])\nfi\n\n# BLAS in PhiPACK libraries? (requires generic BLAS lib, too)\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(blas, $sgemm,\n\t\t[AC_CHECK_LIB(dgemm, $dgemm,\n\t\t[AC_CHECK_LIB(sgemm, $sgemm,\n\t\t\t[ax_blas_ok=yes; BLAS_LIBS=\"-lsgemm -ldgemm -lblas\"],\n\t\t\t[], [-lblas])],\n\t\t\t[], [-lblas])])\nfi\n\n# BLAS in Intel MKL library?\nif test $ax_blas_ok = no; then\n\t# MKL for gfortran\n\tif test x\"$ac_cv_fc_compiler_gnu\" = xyes; then\n\t\t# 64 bit\n\t\tif test $host_cpu = x86_64; then\n\t\t\tAC_CHECK_LIB(mkl_gf_lp64, $sgemm,\n\t\t\t[ax_blas_ok=yes;BLAS_LIBS=\"-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread\"],,\n\t\t\t[-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread])\n\t\t# 32 bit\n\t\telif test $host_cpu = i686; then\n\t\t\tAC_CHECK_LIB(mkl_gf, $sgemm,\n\t\t\t\t[ax_blas_ok=yes;BLAS_LIBS=\"-lmkl_gf -lmkl_sequential -lmkl_core -lpthread\"],,\n\t\t\t\t[-lmkl_gf -lmkl_sequential -lmkl_core -lpthread])\n\t\tfi\n\t# MKL for other compilers (Intel, PGI, ...?)\n\telse\n\t\t# 64-bit\n\t\tif test $host_cpu = x86_64; then\n\t\t\tAC_CHECK_LIB(mkl_intel_lp64, $sgemm,\n\t\t\t\t[ax_blas_ok=yes;BLAS_LIBS=\"-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread\"],,\n\t\t\t\t[-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread])\n\t\t# 32-bit\n\t\telif test $host_cpu = i686; then\n\t\t\tAC_CHECK_LIB(mkl_intel, $sgemm,\n\t\t\t\t[ax_blas_ok=yes;BLAS_LIBS=\"-lmkl_intel -lmkl_sequential -lmkl_core -lpthread\"],,\n\t\t\t\t[-lmkl_intel -lmkl_sequential -lmkl_core -lpthread])\n\t\tfi\n\tfi\nfi\n# Old versions of MKL\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(mkl, $sgemm, [ax_blas_ok=yes;BLAS_LIBS=\"-lmkl -lguide -lpthread\"],,[-lguide -lpthread])\nfi\n\n# BLAS in Apple vecLib library?\nif test $ax_blas_ok = no; then\n\tsave_LIBS=\"$LIBS\"; LIBS=\"-framework vecLib $LIBS\"\n\tAC_MSG_CHECKING([for $sgemm in -framework vecLib])\n\tAC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes;BLAS_LIBS=\"-framework vecLib\"])\n\tAC_MSG_RESULT($ax_blas_ok)\n\tLIBS=\"$save_LIBS\"\nfi\n\n# BLAS in Alpha CXML library?\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(cxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS=\"-lcxml\"])\nfi\n\n# BLAS in Alpha DXML library? (now called CXML, see above)\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(dxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS=\"-ldxml\"])\nfi\n\n# BLAS in Sun Performance library?\nif test $ax_blas_ok = no; then\n\tif test \"x$GCC\" != xyes; then # only works with Sun CC\n\t\tAC_CHECK_LIB(sunmath, acosp,\n\t\t\t[AC_CHECK_LIB(sunperf, $sgemm,\n\t\t\t\t[BLAS_LIBS=\"-xlic_lib=sunperf -lsunmath\"\n                                 ax_blas_ok=yes],[],[-lsunmath])])\n\tfi\nfi\n\n# BLAS in SCSL library?  (SGI/Cray Scientific Library)\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(scs, $sgemm, [ax_blas_ok=yes; BLAS_LIBS=\"-lscs\"])\nfi\n\n# BLAS in SGIMATH library?\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(complib.sgimath, $sgemm,\n\t\t     [ax_blas_ok=yes; BLAS_LIBS=\"-lcomplib.sgimath\"])\nfi\n\n# BLAS in IBM ESSL library? (requires generic BLAS lib, too)\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(blas, $sgemm,\n\t\t[AC_CHECK_LIB(essl, $sgemm,\n\t\t\t[ax_blas_ok=yes; BLAS_LIBS=\"-lessl -lblas\"],\n\t\t\t[], [-lblas $FLIBS])])\nfi\n\n# Generic BLAS library?\nif test $ax_blas_ok = no; then\n\tAC_CHECK_LIB(blas, $sgemm, [ax_blas_ok=yes; BLAS_LIBS=\"-lblas\"])\nfi\n\nAC_SUBST(BLAS_LIBS)\n\nLIBS=\"$ax_blas_save_LIBS\"\n\n# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:\nif test x\"$ax_blas_ok\" = xyes; then\n        ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1])\n        :\nelse\n        ax_blas_ok=no\n        $2\nfi\n])dnl AX_BLAS\n"
  },
  {
    "path": "m4/ax_lapack.m4",
    "content": "# ===========================================================================\n#         http://www.gnu.org/software/autoconf-archive/ax_lapack.html\n# ===========================================================================\n#\n# SYNOPSIS\n#\n#   AX_LAPACK([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])\n#\n# DESCRIPTION\n#\n#   This macro looks for a library that implements the LAPACK linear-algebra\n#   interface (see http://www.netlib.org/lapack/). On success, it sets the\n#   LAPACK_LIBS output variable to hold the requisite library linkages.\n#\n#   To link with LAPACK, you should link with:\n#\n#     $LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS\n#\n#   in that order. BLAS_LIBS is the output variable of the AX_BLAS macro,\n#   called automatically. FLIBS is the output variable of the\n#   AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is\n#   sometimes necessary in order to link with F77 libraries. Users will also\n#   need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same\n#   reason.\n#\n#   The user may also use --with-lapack=<lib> in order to use some specific\n#   LAPACK library <lib>. In order to link successfully, however, be aware\n#   that you will probably need to use the same Fortran compiler (which can\n#   be set via the F77 env. var.) as was used to compile the LAPACK and BLAS\n#   libraries.\n#\n#   ACTION-IF-FOUND is a list of shell commands to run if a LAPACK library\n#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it\n#   is not found. If ACTION-IF-FOUND is not specified, the default action\n#   will define HAVE_LAPACK.\n#\n# LICENSE\n#\n#   Copyright (c) 2009 Steven G. Johnson <stevenj@alum.mit.edu>\n#\n#   This program is free software: you can redistribute it and/or modify it\n#   under the terms of the GNU General Public License as published by the\n#   Free Software Foundation, either version 3 of the License, or (at your\n#   option) any later version.\n#\n#   This program is distributed in the hope that it will be useful, but\n#   WITHOUT ANY WARRANTY; without even the implied warranty of\n#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General\n#   Public License for more details.\n#\n#   You should have received a copy of the GNU General Public License along\n#   with this program. If not, see <http://www.gnu.org/licenses/>.\n#\n#   As a special exception, the respective Autoconf Macro's copyright owner\n#   gives unlimited permission to copy, distribute and modify the configure\n#   scripts that are the output of Autoconf when processing the Macro. You\n#   need not follow the terms of the GNU General Public License when using\n#   or distributing such scripts, even though portions of the text of the\n#   Macro appear in them. The GNU General Public License (GPL) does govern\n#   all other use of the material that constitutes the Autoconf Macro.\n#\n#   This special exception to the GPL applies to versions of the Autoconf\n#   Macro released by the Autoconf Archive. When you make and distribute a\n#   modified version of the Autoconf Macro, you may extend this special\n#   exception to the GPL to apply to your modified version as well.\n\n#serial 7\n\nAU_ALIAS([ACX_LAPACK], [AX_LAPACK])\nAC_DEFUN([AX_LAPACK], [\nAC_REQUIRE([AX_BLAS])\nax_lapack_ok=no\n\nAC_ARG_WITH(lapack,\n        [AS_HELP_STRING([--with-lapack=<lib>], [use LAPACK library <lib>])])\ncase $with_lapack in\n        yes | \"\") ;;\n        no) ax_lapack_ok=disable ;;\n        -* | */* | *.a | *.so | *.so.* | *.o) LAPACK_LIBS=\"$with_lapack\" ;;\n        *) LAPACK_LIBS=\"-l$with_lapack\" ;;\nesac\n\n# Get fortran linker name of LAPACK function to check for.\nAC_F77_FUNC(cheev)\n\n# We cannot use LAPACK if BLAS is not found\nif test \"x$ax_blas_ok\" != xyes; then\n        ax_lapack_ok=noblas\n        LAPACK_LIBS=\"\"\nfi\n\n# First, check LAPACK_LIBS environment variable\nif test \"x$LAPACK_LIBS\" != x; then\n        save_LIBS=\"$LIBS\"; LIBS=\"$LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS\"\n        AC_MSG_CHECKING([for $cheev in $LAPACK_LIBS])\n        AC_TRY_LINK_FUNC($cheev, [ax_lapack_ok=yes], [LAPACK_LIBS=\"\"])\n        AC_MSG_RESULT($ax_lapack_ok)\n        LIBS=\"$save_LIBS\"\n        if test $ax_lapack_ok = no; then\n                LAPACK_LIBS=\"\"\n        fi\nfi\n\n# LAPACK linked to by default?  (is sometimes included in BLAS lib)\nif test $ax_lapack_ok = no; then\n        save_LIBS=\"$LIBS\"; LIBS=\"$LIBS $BLAS_LIBS $FLIBS\"\n        AC_CHECK_FUNC($cheev, [ax_lapack_ok=yes])\n        LIBS=\"$save_LIBS\"\nfi\n\n# Generic LAPACK library?\nfor lapack in lapack lapack_rs6k; do\n        if test $ax_lapack_ok = no; then\n                save_LIBS=\"$LIBS\"; LIBS=\"$BLAS_LIBS $LIBS\"\n                AC_CHECK_LIB($lapack, $cheev,\n                    [ax_lapack_ok=yes; LAPACK_LIBS=\"-l$lapack\"], [], [$FLIBS])\n                LIBS=\"$save_LIBS\"\n        fi\ndone\n\nAC_SUBST(LAPACK_LIBS)\n\n# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:\nif test x\"$ax_lapack_ok\" = xyes; then\n        ifelse([$1],,AC_DEFINE(HAVE_LAPACK,1,[Define if you have LAPACK library.]),[$1])\n        :\nelse\n        ax_lapack_ok=no\n        $2\nfi\n])dnl AX_LAPACK\n"
  },
  {
    "path": "m4/ltsugar.m4",
    "content": "# ltsugar.m4 -- libtool m4 base layer.                         -*-Autoconf-*-\n#\n# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.\n# Written by Gary V. Vaughan, 2004\n#\n# This file is free software; the Free Software Foundation gives\n# unlimited permission to copy and/or distribute it, with or without\n# modifications, as long as this notice is preserved.\n\n# serial 6 ltsugar.m4\n\n# This is to help aclocal find these macros, as it can't see m4_define.\nAC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])\n\n\n# lt_join(SEP, ARG1, [ARG2...])\n# -----------------------------\n# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their\n# associated separator.\n# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier\n# versions in m4sugar had bugs.\nm4_define([lt_join],\n[m4_if([$#], [1], [],\n       [$#], [2], [[$2]],\n       [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])\nm4_define([_lt_join],\n[m4_if([$#$2], [2], [],\n       [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])\n\n\n# lt_car(LIST)\n# lt_cdr(LIST)\n# ------------\n# Manipulate m4 lists.\n# These macros are necessary as long as will still need to support\n# Autoconf-2.59 which quotes differently.\nm4_define([lt_car], [[$1]])\nm4_define([lt_cdr],\n[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],\n       [$#], 1, [],\n       [m4_dquote(m4_shift($@))])])\nm4_define([lt_unquote], $1)\n\n\n# lt_append(MACRO-NAME, STRING, [SEPARATOR])\n# ------------------------------------------\n# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.\n# Note that neither SEPARATOR nor STRING are expanded; they are appended\n# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).\n# No SEPARATOR is output if MACRO-NAME was previously undefined (different\n# than defined and empty).\n#\n# This macro is needed until we can rely on Autoconf 2.62, since earlier\n# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.\nm4_define([lt_append],\n[m4_define([$1],\n\t   m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])\n\n\n\n# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])\n# ----------------------------------------------------------\n# Produce a SEP delimited list of all paired combinations of elements of\n# PREFIX-LIST with SUFFIX1 through SUFFIXn.  Each element of the list\n# has the form PREFIXmINFIXSUFFIXn.\n# Needed until we can rely on m4_combine added in Autoconf 2.62.\nm4_define([lt_combine],\n[m4_if(m4_eval([$# > 3]), [1],\n       [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl\n[[m4_foreach([_Lt_prefix], [$2],\n\t     [m4_foreach([_Lt_suffix],\n\t\t]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,\n\t[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])\n\n\n# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])\n# -----------------------------------------------------------------------\n# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited\n# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.\nm4_define([lt_if_append_uniq],\n[m4_ifdef([$1],\n\t  [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],\n\t\t [lt_append([$1], [$2], [$3])$4],\n\t\t [$5])],\n\t  [lt_append([$1], [$2], [$3])$4])])\n\n\n# lt_dict_add(DICT, KEY, VALUE)\n# -----------------------------\nm4_define([lt_dict_add],\n[m4_define([$1($2)], [$3])])\n\n\n# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)\n# --------------------------------------------\nm4_define([lt_dict_add_subkey],\n[m4_define([$1($2:$3)], [$4])])\n\n\n# lt_dict_fetch(DICT, KEY, [SUBKEY])\n# ----------------------------------\nm4_define([lt_dict_fetch],\n[m4_ifval([$3],\n\tm4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),\n    m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])\n\n\n# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])\n# -----------------------------------------------------------------\nm4_define([lt_if_dict_fetch],\n[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],\n\t[$5],\n    [$6])])\n\n\n# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])\n# --------------------------------------------------------------\nm4_define([lt_dict_filter],\n[m4_if([$5], [], [],\n  [lt_join(m4_quote(m4_default([$4], [[, ]])),\n           lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),\n\t\t      [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl\n])\n"
  },
  {
    "path": "m4/lt~obsolete.m4",
    "content": "# lt~obsolete.m4 -- aclocal satisfying obsolete definitions.    -*-Autoconf-*-\n#\n#   Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.\n#   Written by Scott James Remnant, 2004.\n#\n# This file is free software; the Free Software Foundation gives\n# unlimited permission to copy and/or distribute it, with or without\n# modifications, as long as this notice is preserved.\n\n# serial 5 lt~obsolete.m4\n\n# These exist entirely to fool aclocal when bootstrapping libtool.\n#\n# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)\n# which have later been changed to m4_define as they aren't part of the\n# exported API, or moved to Autoconf or Automake where they belong.\n#\n# The trouble is, aclocal is a bit thick.  It'll see the old AC_DEFUN\n# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us\n# using a macro with the same name in our local m4/libtool.m4 it'll\n# pull the old libtool.m4 in (it doesn't see our shiny new m4_define\n# and doesn't know about Autoconf macros at all.)\n#\n# So we provide this file, which has a silly filename so it's always\n# included after everything else.  This provides aclocal with the\n# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything\n# because those macros already exist, or will be overwritten later.\n# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. \n#\n# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.\n# Yes, that means every name once taken will need to remain here until\n# we give up compatibility with versions before 1.7, at which point\n# we need to keep only those names which we still refer to.\n\n# This is to help aclocal find these macros, as it can't see m4_define.\nAC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])\n\nm4_ifndef([AC_LIBTOOL_LINKER_OPTION],\t[AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])\nm4_ifndef([AC_PROG_EGREP],\t\t[AC_DEFUN([AC_PROG_EGREP])])\nm4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH],\t[AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])\nm4_ifndef([_LT_AC_SHELL_INIT],\t\t[AC_DEFUN([_LT_AC_SHELL_INIT])])\nm4_ifndef([_LT_AC_SYS_LIBPATH_AIX],\t[AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])\nm4_ifndef([_LT_PROG_LTMAIN],\t\t[AC_DEFUN([_LT_PROG_LTMAIN])])\nm4_ifndef([_LT_AC_TAGVAR],\t\t[AC_DEFUN([_LT_AC_TAGVAR])])\nm4_ifndef([AC_LTDL_ENABLE_INSTALL],\t[AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])\nm4_ifndef([AC_LTDL_PREOPEN],\t\t[AC_DEFUN([AC_LTDL_PREOPEN])])\nm4_ifndef([_LT_AC_SYS_COMPILER],\t[AC_DEFUN([_LT_AC_SYS_COMPILER])])\nm4_ifndef([_LT_AC_LOCK],\t\t[AC_DEFUN([_LT_AC_LOCK])])\nm4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE],\t[AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])\nm4_ifndef([_LT_AC_TRY_DLOPEN_SELF],\t[AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])\nm4_ifndef([AC_LIBTOOL_PROG_CC_C_O],\t[AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])\nm4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])\nm4_ifndef([AC_LIBTOOL_OBJDIR],\t\t[AC_DEFUN([AC_LIBTOOL_OBJDIR])])\nm4_ifndef([AC_LTDL_OBJDIR],\t\t[AC_DEFUN([AC_LTDL_OBJDIR])])\nm4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])\nm4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP],\t[AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])\nm4_ifndef([AC_PATH_MAGIC],\t\t[AC_DEFUN([AC_PATH_MAGIC])])\nm4_ifndef([AC_PROG_LD_GNU],\t\t[AC_DEFUN([AC_PROG_LD_GNU])])\nm4_ifndef([AC_PROG_LD_RELOAD_FLAG],\t[AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])\nm4_ifndef([AC_DEPLIBS_CHECK_METHOD],\t[AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])\nm4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])\nm4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])\nm4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])\nm4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS],\t[AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])\nm4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP],\t[AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])\nm4_ifndef([LT_AC_PROG_EGREP],\t\t[AC_DEFUN([LT_AC_PROG_EGREP])])\nm4_ifndef([LT_AC_PROG_SED],\t\t[AC_DEFUN([LT_AC_PROG_SED])])\nm4_ifndef([_LT_CC_BASENAME],\t\t[AC_DEFUN([_LT_CC_BASENAME])])\nm4_ifndef([_LT_COMPILER_BOILERPLATE],\t[AC_DEFUN([_LT_COMPILER_BOILERPLATE])])\nm4_ifndef([_LT_LINKER_BOILERPLATE],\t[AC_DEFUN([_LT_LINKER_BOILERPLATE])])\nm4_ifndef([_AC_PROG_LIBTOOL],\t\t[AC_DEFUN([_AC_PROG_LIBTOOL])])\nm4_ifndef([AC_LIBTOOL_SETUP],\t\t[AC_DEFUN([AC_LIBTOOL_SETUP])])\nm4_ifndef([_LT_AC_CHECK_DLFCN],\t\t[AC_DEFUN([_LT_AC_CHECK_DLFCN])])\nm4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER],\t[AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])\nm4_ifndef([_LT_AC_TAGCONFIG],\t\t[AC_DEFUN([_LT_AC_TAGCONFIG])])\nm4_ifndef([AC_DISABLE_FAST_INSTALL],\t[AC_DEFUN([AC_DISABLE_FAST_INSTALL])])\nm4_ifndef([_LT_AC_LANG_CXX],\t\t[AC_DEFUN([_LT_AC_LANG_CXX])])\nm4_ifndef([_LT_AC_LANG_F77],\t\t[AC_DEFUN([_LT_AC_LANG_F77])])\nm4_ifndef([_LT_AC_LANG_GCJ],\t\t[AC_DEFUN([_LT_AC_LANG_GCJ])])\nm4_ifndef([AC_LIBTOOL_LANG_C_CONFIG],\t[AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])\nm4_ifndef([_LT_AC_LANG_C_CONFIG],\t[AC_DEFUN([_LT_AC_LANG_C_CONFIG])])\nm4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG],\t[AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])\nm4_ifndef([_LT_AC_LANG_CXX_CONFIG],\t[AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])\nm4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG],\t[AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])\nm4_ifndef([_LT_AC_LANG_F77_CONFIG],\t[AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])\nm4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG],\t[AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])\nm4_ifndef([_LT_AC_LANG_GCJ_CONFIG],\t[AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])\nm4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG],\t[AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])\nm4_ifndef([_LT_AC_LANG_RC_CONFIG],\t[AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])\nm4_ifndef([AC_LIBTOOL_CONFIG],\t\t[AC_DEFUN([AC_LIBTOOL_CONFIG])])\nm4_ifndef([_LT_AC_FILE_LTDLL_C],\t[AC_DEFUN([_LT_AC_FILE_LTDLL_C])])\nm4_ifndef([_LT_REQUIRED_DARWIN_CHECKS],\t[AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])\nm4_ifndef([_LT_AC_PROG_CXXCPP],\t\t[AC_DEFUN([_LT_AC_PROG_CXXCPP])])\nm4_ifndef([_LT_PREPARE_SED_QUOTE_VARS],\t[AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])\nm4_ifndef([_LT_PROG_ECHO_BACKSLASH],\t[AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])\nm4_ifndef([_LT_PROG_F77],\t\t[AC_DEFUN([_LT_PROG_F77])])\nm4_ifndef([_LT_PROG_FC],\t\t[AC_DEFUN([_LT_PROG_FC])])\nm4_ifndef([_LT_PROG_CXX],\t\t[AC_DEFUN([_LT_PROG_CXX])])\n"
  },
  {
    "path": "makefile_include.in",
    "content": "# Template for configure to create makefile_include, which is included\n# by test/Makefile and benchmark/Makefile\n\nAR = @AR@\nCC = @CC@\nCFLAGS = @CFLAGS@\nCPP = @CPP@\nCPPFLAGS = @CPPFLAGS@\nCXX = @CXX@\nCXXCPP = @CXXCPP@\nCXXFLAGS = @CXXFLAGS@ @OPENMP_CXXFLAGS@\nDEFS = @DEFS@\nLD = @LD@\nLDFLAGS = @LDFLAGS@ @OPENMP_CXXFLAGS@\nLIBOBJS = @LIBOBJS@\nLIBS = @LIBS@\nSHELL = @SHELL@\nLIBTOOL = @LIBTOOL@\nUSE_GSL = @USE_GSL@\n"
  },
  {
    "path": "test/Makefile",
    "content": "# Makefile for example programs that demonstrate different features of\n# the Adept library\n#\n# Note that this Makefile is hand-coded rather than being generated by\n# automake\n#\n# The -DADEPT_RECORDING_PAUSABLE option enables the pause_recording\n# and continue_recording functionality and is used by test_adept,\n# although it will run correctly (but slightly more slowly) without\n# this flag\n\n# The configure script writes the following file, which contains\n# variables controlling the compilation\ninclude ../makefile_include\n\n# Uncomment the following to check what happens if thread safety\n# disabled\n# ADEPT_FLAGS = -DADEPT_STACK_THREAD_UNSAFE\n\n# The objects to create\nOBJECTS = algorithm.o algorithm_noad.o test_checkpoint.o \\\n\ttest_adept.o test_adept_with_and_without_ad.o \\\n\ttest_radiances.o simulate_radiances.o test_thread_safe.o \\\n\ttest_no_lib.o test_misc.o test_arrays.o test_arrays_active.o \\\n\ttest_array_speed.o test_arrays_active_pausable.o \\\n\ttest_fixed_arrays_active.o test_radiances_array.o \\\n\ttest_fixed_arrays.o test_constructors.o test_derivatives.o \\\n\ttest_array_derivatives.o test_thread_safe_arrays.o \\\n\ttest_complex_arrays.o test_packet_operations.o \\\n\ttest_fastexp.o test_reduce_active.o test_minimizer.o \\\n\ttest_interp.o\n\nGSL_OBJECTS = test_gsl_interface.o state.o rosenbrock_banana_function.o\n\nGSL_LIBS = -lgsl\n\nCOMPILE_FLAGS = $(CXXFLAGS) -I../include $(CPPFLAGS) $(ADEPT_FLAGS)\n\n# Because we aren't going to install the test programs, and we want\n# them to work even if Adept is not installed, it is easiest to use\n# libtool to create statically-linked executables\ntop_builddir = ..\nCXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) \\\n\t-static -no-install -L../adept/.libs $(LDFLAGS) -ladept -o $@\n\n# Link without the Adept library\nCXXLINK_NOLIB = $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) \\\n\t$(LDFLAGS) -o $@\n\n# Dependency on the presence of the Adept static library\nLIBADEPT = ../adept/.libs/libadept.a\n\nMYLIBS = $(LIBS)\n\nPROGRAMS = test_adept test_adept_with_and_without_ad test_radiances \\\n\ttest_gsl_interface test_misc test_checkpoint test_thread_safe \\\n\ttest_array_speed test_no_lib test_radiances_array test_constructors \\\n\ttest_arrays test_arrays_active test_arrays_active_pausable \\\n\ttest_fixed_arrays test_fixed_arrays_active test_derivatives \\\n\ttest_array_derivatives test_thread_safe_arrays test_complex_arrays \\\n\ttest_packet_operations test_fastexp test_reduce_active test_minimizer \\\n\ttest_interp\n\nall:\n\t@echo \"********************************************************\"\n\t@echo \"*** To compile test programs in test/ and benchmark/ ***\"\n\t@echo \"*** type \\\"make check\\\"                                ***\"\n\t@echo \"********************************************************\"\n\n# Compile all four programs\ncheck: $(PROGRAMS) run-tests\n\nrun-tests:\n\t./run_tests.sh $(PROGRAMS)\n\n# Test program 1\ntest_adept: algorithm.o test_adept.o $(LIBADEPT)\n\t$(CXXLINK) algorithm.o test_adept.o $(MYLIBS)\n\n# Test program 2\ntest_adept_with_and_without_ad: algorithm.o algorithm_noad.o test_adept_with_and_without_ad.o $(LIBADEPT)\n\t$(CXXLINK) algorithm.o algorithm_noad.o test_adept_with_and_without_ad.o $(MYLIBS)\n\n# Test program 3\ntest_radiances: simulate_radiances.o test_radiances.o $(LIBADEPT)\n\t$(CXXLINK) simulate_radiances.o test_radiances.o $(MYLIBS)\n\nifeq \"X$(USE_GSL)\" \"Xyes\"\n# Test program 4\ntest_gsl_interface: $(GSL_OBJECTS) $(LIBADEPT)\n\t$(CXXLINK) $(GSL_OBJECTS) $(GSL_LIBS) $(MYLIBS)\nelse\ntest_gsl_interface:\n\t@echo \"The executable test_gsl_interface will not be created because GSL library was not found\"\nendif\n\n# Test program 5\ntest_misc: test_misc.o algorithm.o $(LIBADEPT)\n\t$(CXXLINK) test_misc.o algorithm.o $(MYLIBS)\n\n# Test program 6\ntest_checkpoint: test_checkpoint.o $(LIBADEPT)\n\t$(CXXLINK) test_checkpoint.o $(MYLIBS)\n\n# Test program 7\ntest_thread_safe: test_thread_safe.o $(LIBADEPT)\n\t$(CXXLINK) test_thread_safe.o $(MYLIBS)\n\n# Test program 8 (note that it is not linked against the Adept library)\ntest_no_lib: test_no_lib.o algorithm.o\n\t$(CXXLINK_NOLIB) test_no_lib.o algorithm.o $(MYLIBS)\n\n# Test program 9a\ntest_arrays: test_arrays.o $(LIBADEPT)\n\t$(CXXLINK) test_arrays.o $(MYLIBS)\n\n# Test program 9b\ntest_arrays_active: test_arrays_active.o $(LIBADEPT)\n\t$(CXXLINK) test_arrays_active.o $(MYLIBS)\n\n# Test program 9c\ntest_arrays_active_pausable: test_arrays_active_pausable.o $(LIBADEPT)\n\t$(CXXLINK) test_arrays_active_pausable.o $(MYLIBS)\n\n# Test program 9d\ntest_complex_arrays: test_complex_arrays.o $(LIBADEPT)\n\t$(CXXLINK) test_complex_arrays.o $(MYLIBS)\n\n# Test program 10\ntest_array_speed: test_array_speed.o $(LIBADEPT)\n\t$(CXXLINK) test_array_speed.o $(MYLIBS)\n\n\n# Test program 11\ntest_radiances_array: simulate_radiances.o test_radiances_array.o $(LIBADEPT)\n\t$(CXXLINK) simulate_radiances.o test_radiances_array.o $(MYLIBS)\n\n# Test program 12a\ntest_fixed_arrays: test_fixed_arrays.o $(LIBADEPT)\n\t$(CXXLINK) test_fixed_arrays.o $(MYLIBS)\n\n# Test program 12b\ntest_fixed_arrays_active: test_fixed_arrays_active.o $(LIBADEPT)\n\t$(CXXLINK) test_fixed_arrays_active.o $(MYLIBS)\n\n# Test program 13\ntest_constructors: test_constructors.o $(LIBADEPT)\n\t$(CXXLINK) test_constructors.o $(MYLIBS)\n\n# Test program 14\ntest_derivatives: test_derivatives.o $(LIBADEPT)\n\t$(CXXLINK) test_derivatives.o $(MYLIBS)\n\n# Test program 15\ntest_array_derivatives: test_array_derivatives.o $(LIBADEPT)\n\t$(CXXLINK) test_array_derivatives.o $(MYLIBS)\n\n# Test program 16\ntest_thread_safe_arrays: test_thread_safe_arrays.o $(LIBADEPT)\n\t$(CXXLINK) test_thread_safe_arrays.o $(MYLIBS)\n\n# Test program 17\ntest_packet_operations: test_packet_operations.o $(LIBADEPT)\n\t$(CXXLINK) test_packet_operations.o $(MYLIBS)\n\n# Test program 18\ntest_fastexp: test_fastexp.o $(LIBADEPT)\n\t$(CXXLINK) test_fastexp.o $(MYLIBS)\n\n# Test program 19\ntest_reduce_active: test_reduce_active.o $(LIBADEPT)\n\t$(CXXLINK) test_reduce_active.o $(MYLIBS)\n\n# Test program 20\ntest_minimizer: test_minimizer.o $(LIBADEPT)\n\t$(CXXLINK) test_minimizer.o $(MYLIBS)\n\n# Test program 21\ntest_interp: test_interp.o $(LIBADEPT)\n\t$(CXXLINK) test_interp.o $(MYLIBS)\n\n# The no-automatic-differentiation version of the algorithm: uses the\n# -DADEPT_NO_AUTOMATIC_DIFFERENTIATION to produce a version of the\n# algorithm that takes double rather than adouble arguments\nalgorithm_noad.o: algorithm.cpp *.h ../include/adept.h\n\t$(CXX) $(COMPILE_FLAGS) $(INCLUDES) -c algorithm.cpp -DADEPT_NO_AUTOMATIC_DIFFERENTIATION -o $@\n\n# All other object files created by compiling the corresponding source\n# file without this flag\n%.o: %.cpp *.h ../include/*.h ../include/adept/*.h\n\t$(CXX) $(COMPILE_FLAGS) $(INCLUDES) -c $<\n\ntest_arrays_active.o: test_arrays.cpp  *.h ../include/*.h ../include/adept/*.h\n\t$(CXX) $(COMPILE_FLAGS) -DALL_ACTIVE $(INCLUDES) -c test_arrays.cpp -o test_arrays_active.o\n\ntest_arrays_active_pausable.o: test_arrays.cpp  *.h ../include/*.h ../include/adept/*h\n\t$(CXX) $(COMPILE_FLAGS) -DADEPT_RECORDING_PAUSABLE -DALL_ACTIVE $(INCLUDES) -c test_arrays.cpp -o test_arrays_active_pausable.o\n\ntest_complex_arrays.o: test_arrays.cpp  *.h ../include/*.h ../include/adept/*.h\n\t$(CXX) $(COMPILE_FLAGS) -DALL_COMPLEX $(INCLUDES) -c test_arrays.cpp -o test_complex_arrays.o\n\ntest_fixed_arrays_active.o: test_fixed_arrays.cpp  *.h ../include/*.h ../include/adept/*.h\n\t$(CXX) $(COMPILE_FLAGS) -DALL_ACTIVE $(INCLUDES) -c test_fixed_arrays.cpp -o test_fixed_arrays_active.o\n\n# Remove all object files and executables\nclean:\n\trm -f $(OBJECTS) $(GSL_OBJECTS) $(PROGRAMS) test_stderr.txt test_results.txt\n\nmostlyclean: clean\n\n# Null targets to satisfy autotools\nEMPTY_AUTOMAKE_TARGETS = distdir install install-data install-exec uninstall \\\n\tinstall-dvi install-html install-info install-ps install-pdf \\\n\tinstalldirs installcheck distclean maintainer-clean \\\n\tdvi pdf ps info html tags ctags\n.PHONY: $(EMPTY_AUTOMAKE_TARGETS)\n$(EMPTY_AUTOMAKE_TARGETS):\n"
  },
  {
    "path": "test/README",
    "content": "This directory contains examples to demonstrate various features of\nAdept. Type \"make check\" from the directory above to compile\nthem. \n\nNote that unlike in the rest of this package, the Makefile in this\ndirectory was not generated by automake; it is well commented and so\nmay assist in understanding how to build software that uses Adept.\n\n\nTEST 1: BASIC FEATURES\n\nExecutable: test_adept\n\nSource files: test_adept.cpp, algorithm.cpp, algorithm.h\n\nDemonstrates: basic use of Adept, reverse-mode automatic\ndifferentiation, computing the Jacobian matrix, printing diagnostic\ninformation, verifying results by comparing to numerical calculations,\npausing and continuing recordings\n\nSynopsis: This program demonstrates how to differentiate a simple\nfunction (in algorithm.cpp), comparing the results from automatic\ndifferentiation with numerical differentiation. The function used is\nthe contrived example from the Adept paper.\n\n\nTEST 2: COMPILING SOURCE FILES TWICE, WITH AND WITHOUT AUTOMATIC\nDIFFERENTIATION\n\nExecutable: test_adept_with_and_without_ad\n\nSource files: test_adept_with_and_without_ad.cpp, algorithm.cpp,\nalgorithm.h, algorithm_with_and_without_ad.h\n\nDemonstrates: most of the same features as TEST_ADEPT, plus compiling\na source file twice\n\nSynopsis: This program is the same as in Test 1, except that\nalgorithm.cpp is compiled twice, once with automatic differentiation\n(producing the object file algorithm.o) and once without (producing\nthe object file algorithm_noad.o). This is achieved in the Makefile\nusing the -DADEPT_NO_AUTOMATIC_DIFFERENTIATION flag. This provides two\noverloaded versions of the \"algorithm\" function, one that takes active\n\"adouble\" arguments, and the other that takes inactive \"double\"\narguments. The two versions are declared in the\nalgorithm_with_and_without_ad.h header file.\n\n\nTEST 3: RADIANCE SIMULATION\n\nExecutable: test_radiances\n\nSource files: test_radiances.cpp, simulate_radiances.cpp,\nsimulate_radiances.h\n\nDemonstrates: activation and deactivation of an Adept stack, using\nmore than one Adept stack in the same program (but not at the same\ntime), how to interface Adept with software that computes its own\nJacobian\n\nSynopsis: The \"main\" function is in test_radiances.cpp, and\ndemonstrates how to interface Adept to an algorithm that does not have\nan Adept interface, but which provides its own Jacobian. The algorithm\nin this case is in simulate_radiances.cpp; while it does not have an\nAdept interface, it does use Adept internally to compute the Jacobian\nthat it returns. It therefore needs to temporarily deactivate the\ncalling function's Adept stack (where derivative information is\nstored) while using its own.  This example is from the Adept\ndocumentation.\n\n\nTEST 4: GSL MINIMIZATION INTERFACE\n\nExecutable: test_gsl_interface\n\nCommand-line arguments: optionally, the executable name can be\nfollowed by an integer (which should be 2 or greater) expressing the\nnumber of dimensions of the minimization problem.  The default is 2.\n\nSource files: test_gsl_interface.cpp, rosenbrock_banana_function.cpp,\nstate.cpp, state.h\n\nPre-requisites: the GNU Scientific Library should be installed; on an\nRPM-based system you want the \"gsl\" and \"gsl-devel\" packages. If this\nis not available at the time the configure script is run, this\nexecutable will not be built.\n\nDemonstrates: interface with the multi-dimensional minimization\ncapability of the GNU Scientific Library, use of Adept to minimize a\nreal function, an object-oriented way to store Adept data for a\nminimization problem\n\nSynopsis: The \"main\" function is in test_gsl_interface.cpp and is\nfairly self-explanatory. The state.cpp and state.h files show how\nAdept data can be stored and accessed in an object-oriented way. The\nfunction to be minimized is the N-dimensional Rosenbrock banana\nfunction, given in rosenbrock_banana_function.cpp.\n\n\nTEST 5: TRIVIAL EXAMPLE IN ADEPT PAPER\n\nExecutable: test_misc\n\nSource files: test_misc.cpp, algorithm.cpp, algorithm.h\n\nDemonstrates: basic use of Adept, reverse-mode automatic\ndifferentiation\n\nSynopsis: This program is simply the trivial example in the Adept\npaper, using the same algorithm as in Test 1.\n\n\nTEST 6: CHECKPOINTING\n\nExecutable: test_checkpointing\n\nSource files: test_checkpoint.cpp\n\nDemonstrates: checkpointing\n\nSynopsis: Large algorithms, particularly those that involve\ntime-dependent simulations, can require a lot of memory when used with\nan automatic-differentiation tool. Even if enough memory is available,\nthe speed may be sub-optimal.  This program demonstrates the\ncheckpointing technique, where a simulation using the \"Toon\" algorithm\nin the Adept paper is first run with 10,000 timesteps, and then in 100\nblocks of 100 timesteps (the checkpointed simulation), with the output\nstored after each block so that the reverse pass of the automatic\ndifferentiation needs 100 times less memory. The resulting gradients\nare output to verify that the two versions produce the results, and\nthe timings of the two are presented as well.\n\n\nTEST 7: THREAD SAFETY\n\nExecutable: test_thread_safe\n\nSource files: test_thread_safe.cpp\n\nDemonstrates: use of Adept in multi-threaded applications, thread\nsafety, comparison of Jacobian matrices computed using the forward and\nreverse methods\n\nSynopsis: This program computes the 128-128 Jacobian matrix of an\nalgorithm 16 times with different inputs.  The Jacobian matrix is\nactually computed twice, once with 128 forward passes through the\nderivative statements and once with 128 reverse passes through the\nderivative statements, and a check is performed to see that the\nroot-mean-squared difference is within some tolerance.\n  The default behaviour (and if the \"-parallel\" command-line argument\nis provided) is to use OpenMP to run the 16 computations in parallel.\nIn this instance the 128 passes required to compute the Jacobian\nmatrices will be computed using just a single thread. If the \"-serial\"\ncommand-line argument is provided then the 16 computations are carried\nout in series.  In this instance, the Adept library is able to run the\nJacobian-matrix calculation in parallel (this behaviour is automatic\nif the program is compiled with the -fopenmp option).\n  If the program is compiled with the ADEPT_STACK_THREAD_UNSAFE\npreprocessor variable defined, or on platforms that don't support\nthread-local variables (e.g. some Mac platforms), then the program\nshould abort in the \"-parallel\" case ONLY.\n\n\n\nTEST 8: COMPILING WITHOUT EXTERNAL ADEPT LIBRARY\n\nExecutable: test_no_lib\n\nSource files: test_no_lib.cpp algorithm.cpp algorithm.h\n\nDemonstrates: use of adept_source.h to create an executable without\nthe need to the external Adept library\n\nSynopsis: This is basically the same as test_misc.cpp, but one of the\nsource files includes adept_source.h (rather than adept.h), which\ncontains the source code for the Adept library. This means that no\nlinking to an external Adept library (via -ladept) is required. This\ncapability makes it easier to distribute a package that can be used on\nthe widest range of operating systems, particularly those like\nMicrosoft Windows that cannot natively run the configure shell script.\n\n\n\nTEST 9a,b,c,d: ARRAY FUNCTIONALITY\n\nExecutables: (a) test_arrays, (b) test_arrays_active, (c)\ntest_arrays_active_pausable, (d) test_complex_arrays\n\nSource files: test_arrays.cpp\n\nDemonstrates: array functionality for (a) passive arrays, (b) active\narrays, (c) active arrays but with stack recording \"paused\", (d)\ncomplex arrays.\n\n\n\nTEST 10: ARRAY SPEED\n\nExecutable: test_array_speed\n\nSource files: test_array_speed.cpp\n\nDemonstrates: speed of arrays versus for loops\n\n\n\nTEST 11: RADIANCE SIMULATION WITH ARRAYS\n\nExecutable: test_radiances_array\n\nSource files: test_radiances_array.cpp, simulate_radiances.cpp,\nsimulate_radiances.h\n\nDemonstrates: use of arrays with add/append_derivative_dependence\n\n\n\nTEST 12a,b: FIXED-ARRAY FUNCTIONALITY\n\nExecutables: (a) test_fixed_arrays, (b) test_fixed_arrays_active\n\nSource file: test_fixed_arrays.cpp\n\nDemonstrates: functionality of fixed arrays, i.e. those whose\ndimensions are set at compile time: (a) passive version, and (b)\nactive version.\n\n\n\nTEST 13: ARRAY CONSTRUCTORS\n\nExecutable: test_constructors\n\nSource file: test_constructors.cpp\n\nDemonstrates: different ways of constructing, assigning and linking\narrays, and passing them to and from functions.\n\n\n\nTEST 14: DERIVATIVES\n\nExeutable: test_derivatives\n\nSource file: test_derivatives.cpp\n\nDemonstrates: validity of the automatic differentiation of all\nmathematical functions supported by Adept, via finite differencing.\n\n\n\nTEST 15: ARRAY DERIVATIVES\n\nExeutable: test_array_derivatives\n\nSource file: test_array_derivatives.cpp\n\nDemonstrates: validity of the automatic differentiation of selected\narray operations, on both Array types and FixedArray types.\n\n\n\nTEST 16: THREAD-SAFE ARRAYS\n\nExecutable: test_thread_safe_arrays\n\nSource file: test_thread_safe_arrays.cpp\n\nDemonstrates: two ways to make accessing arrays thread safe: use the\nsoft_link() member function of Array and SpecialMatrix, OR compile\nwith ADEPT_STORAGE_THREAD_SAFE (C++11 only).\n\n\n\nTEST 17: PACKET OPERATIONS\n\nExecutable: test_packet_operations\n\nSource file: test_packet_operations.cpp\n\nDemonstrates: Use of Intel or ARM intrinsics is mathematically\nconsistent regardless of whether code is compiled with SSE2, NEON,\nAVX2 or AVX512.  You will need to recompile with (e.g. for g++)\n-msse2, -mavx2 or -mavx512f (or simply march=native to use the best\ninstruction set available) and check that the output is the same each\ntime.\n\n\nTEST 18: FAST EXPONENTIAL OPERATIONS\n\nExecutable: test_fastexp\n\nSource file: test_fastexp.cpp\n\nDemonstrates: Correctness of Adept's fast exponential function.\n\n\nTEST 19: ACTIVE REDUCE OPERATIONS\n\nExecutable: test_reduce_active\n\nSource file: test_reduce_active.cpp\n\nDemonstrates: differentiation of reduction operations (sum, product,\nmaxval etc).\n\n\nTEST 20: MINIMIZER\n\nExecutable: test_minimizer\n\nSource file: test_minimizer.cpp\n\nDemonstrates: Adept's various minimization algorithms on the\nN-dimensional Rosenbrock banana function, where the (optional)\narguments are:\n 1. number of dimensions, default 2\n 2. minimization algorithm string, default \"Levenberg-Marquardt\" (also\n available: Levenberg, L-BFGS, Conjugate-Gradient,\n Conjugate-Gradient-FR; additionally, the \"Newton-Levenberg-Marquardt\"\n and \"Newton-Levenberg\" will use the exact Hessian, rather than an\n approximation\n 3. maximum number of iterations, default 100\n 4. gradient-norm to indicate convergence, default 0.1\n\nThe cost function value and gradient norm are reported to standard\noutput each iteration. To standard error is written a table of\nnumbers, one line per call to the function being minimized.  The first\non each line is the number of the sub-iteration, usually the number of\nthe call to the line-search algorithm, starting at 0. Then follows the\nN values of the state vector, followed by the value of the cost\nfunction. This can be used to plot how each minimizer progresses to\nthe solution.\n\n\nTEST 21: INTERPOLATION\n\nExecutable: test_interp\n\nSource file: test_interp.cpp\n\nDemonstrates: Adept's interpolation functions interp, interp2d and\ninterp3d."
  },
  {
    "path": "test/algorithm.cpp",
    "content": "/* algorithm.cpp - A simple demonstration algorithm used in Tests 1 & 2 \n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n*/\n\n\n#include <cmath>\n\n#include \"algorithm.h\"\nusing adept::adouble;\n\n// A simple demonstration algorithm used in the Adept paper. Note that\n// this algorithm can be compiled with\n// -DADEPT_NO_AUTOMATIC_DIFFERENTIATION to create a version that takes\n// double arguments and returns a double result.\nadouble algorithm(const adouble x[2]) {\n  adouble y = 4.0;\n  adouble s = 2.0*x[0] + 3.0*x[1]*x[1];\n  double b=3.0;\n  y = s + b;\n  y *= sin(s);\n  return y;\n}\n \n"
  },
  {
    "path": "test/algorithm.h",
    "content": "/* algorithm.h - Header file for the simple example algorithm function\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#ifndef ALGORITHM_H\n#define ALGORITHM_H 1\n\n// This header file defining the interface of the simple demonstration\n// function \"algorithm\".  This header file is included by both\n// algorithm.cpp, which defines the body of the function, and\n// test_adept.cpp, which calls algorithm. \n\n#include \"adept.h\"\n\n// Declare the function\nadept::adouble algorithm(const adept::adouble x[2]);\n\n#endif\n"
  },
  {
    "path": "test/algorithm_with_and_without_ad.h",
    "content": "/* algorithm_with_and_without_ad.h\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// This header file defining the interface of the simple demonstration\n// function \"algorithm\", and is included by\n// test_adept_with_and_without_ad.cpp. It demonstrates the use of a\n// single source file that is compiled twice to produce two overloaded\n// versions of a function. The \"original\" version takes\n// double-precision arguments and returns a double-precision answer,\n// while the automatic differentiation version takes adouble arguments\n// and returns an adouble answer. The two versions are compiled from\n// the same source file algorithm.cpp by compiling it twice with and\n// without the compiler option -DAUTOMATIC_DIFFERENTIATION.\n\n\n#ifndef ALGORITHM_WITH_AND_WITHOUT_AD_H\n#define ALGORITHM_WITH_AND_WITHOUT_AD_H 1\n\n#include \"adept.h\"\n\n// Declare the original version of the function\ndouble algorithm(const double x[2]);\n\n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n// Declare the automatic-differentiation version of the function\nadept::adouble algorithm(const adept::adouble x[2]);\n#endif\n\n#endif\n"
  },
  {
    "path": "test/rosenbrock_banana_function.cpp",
    "content": "/* rosenbrock_banana_function.cpp - N-dimensional Rosenbrock function\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// This function is an N-dimensional extension of Rosenbrock's banana\n// function; it is actually the \"2nd De Jong function\" - see the\n// Wikipedia entry for Rosenbrock's function.\n\n#include \"state.h\"\n\nusing adept::adouble;\nadouble State::calc_function_value(const adouble* x) {\n  adouble sum = 0.0;\n  for (unsigned int i = 0; i < nx()-1; i++) {\n    adouble a = x[i+1]-x[i]*x[i];\n    sum += (1.0-x[i])*(1.0-x[i]) + 100.0*a*a;\n  }\n  return sum;\n}\n"
  },
  {
    "path": "test/run_tests.sh",
    "content": "#!/bin/sh\n\n# Simple script to run all programs provided to it and report whether\n# they succeed or fail\n\nLOG=test_results.txt\nSTDERR=test_stderr.txt\n\nrm -f $LOG\ntouch $LOG\n\necho\necho \"Writing output of test programs to $LOG\"\necho\n\nFAILURES=0\n\nfor TEST in \"$@\"\ndo\n    if [ -x \"$TEST\" ]\n    then\n\trm -f $STDERR\n\techo >> $LOG\n\techo \"########################################################\" >> $LOG\n\techo \"### $TEST\" >> $LOG\n\techo \"########################################################\" >> $LOG\n\techo >> $LOG\n\t# The built-in version of \"echo\" on some versions of \"sh\" does\n\t# not treat the \"-n\" option correctly, so we use /bin/echo\n\t# here\n\t/bin/echo -n \"$TEST... \"\n\t./$TEST >> $LOG 2> $STDERR\n\tif [ \"$?\" = 0 ]\n\tthen\n\t    echo \"PASSED\"\n\telse\n\t    echo \"*** FAILED ***\"\n\t    cat $STDERR\n\t    FAILURES=`expr $FAILURES + 1`\n\tfi\n    else\n\techo \"$TEST does not exist\"\n    fi\ndone\n\necho\nif [ \"$FAILURES\" -gt \"0\" ]\nthen\n    echo \"$FAILURES programs failed in some way - see detailed output in $LOG\"\nelse\n    echo \"All test programs ran successfully\"\nfi\necho\n\nexit $FAILURES\n"
  },
  {
    "path": "test/simulate_radiances.cpp",
    "content": "/* simulate_radiances.cpp - provides a function taking inactive arguments that returns also Jacobian matrices\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include \"adept.h\"\n#include \"simulate_radiances.h\"\n\nusing adept::aReal;\nusing adept::Real;\n\n// Simulate a single radiance (W sr-1 m-3) given the wavelength (m),\n// emissivity profile, surface temperature (K) and temperature profile\n// (K), where the profile data are located at n points with spacing\n// 1000 m. This function uses active arguments. It is accessible only\n// from within this file; the public interface is the\n// simulate_radiance function.\nstatic\naReal\nsimulate_radiance_private(int n,\n\t\t\t  Real wavelength,\n\t\t\t  const Real* emissivity,\n\t\t\t  const aReal& surface_temperature,\n\t\t\t  const aReal* temperature)\n{\n  static const Real BOLTZMANN_CONSTANT = 1.380648813e-23;\n  static const Real SPEED_OF_LIGHT = 299792458.0;\n\n  int i;\n  aReal bt = surface_temperature; // Brightness temperature in K\n  // Loop up through the atmosphere working out the contribution from\n  // each layer\n  for (i = 0; i < n; i++) {\n    bt = bt*(1.0-emissivity[i]) + emissivity[i]*temperature[i];\n  }\n  // Convert from brightness temperature to radiance using\n  // Rayleigh-Jeans approximation\n  return 2.0*SPEED_OF_LIGHT*BOLTZMANN_CONSTANT*bt\n    /(wavelength*wavelength*wavelength*wavelength);\n}\n\n// Simulate two radiances (W sr-1 m-3) given the surface temperature\n// (K) and temperature profile (K), where the profile data are located\n// at n points with spacing 1000 m. This function uses inactive\n// arguments.\nvoid\nsimulate_radiances(int n, // Size of temperature array\n\t\t   // Input variables:\n\t\t   Real surface_temperature, \n\t\t   const Real* temperature,\n\t\t   // Output variables:\n\t\t   Real radiance[2],\n\t\t   // Output Jacobians:\n\t\t   Real dradiance_dsurface_temperature[2],\n\t\t   Real* dradiance_dtemperature)\n{\n  // First temporarily deactivate any existing Adept stack used by the\n  // calling function\n  adept::Stack* caller_stack = adept::active_stack();\n  if (caller_stack != 0) {\n    caller_stack->deactivate();\n  }\n\n  // Within the scope of these curly brackets, another Adept stack\n  // will be used\n  {\n    // Ficticious oxygen channels around 60 GHz: wavelength in m\n    static const Real wavelength[2] = {0.006, 0.0061}; \n    // Mass absorption coefficient of oxygen in m2 kg-1\n    static const Real mass_abs_coefft[2] = {3.0e-5, 3.0e-3};\n    // Layer thickness in m\n    static const Real dz = 1000.0;\n\n    // Density of oxygen in kg m-3\n    std::vector<Real> density_oxygen(n);\n    // Emissivity at a particular microwave wavelength\n    std::vector<Real> emissivity(n);\n\n    // Start a new stack\n    adept::Stack s;\n\n    // Create local active variables: surface temperature, temperature\n    // and radiance\n    aReal st = surface_temperature;\n    std::vector<aReal> t(n);\n    aReal r[2];\n\n    // Initialize the oxygen density and temperature\n    for (int i = 0; i < n; i++) {\n      Real altitude = i*dz;\n      // Oxygen density uses an assumed volume mixing ratio with air\n      // of 21%, molecular mass of 16 (compared to 29 for air), a\n      // surface air density of 1.2 kg m-3 and an atmospheric scale\n      // height of 8000 m\n      density_oxygen[i] = 1.2*0.21*(16.0/29.0)*exp(-altitude/8000.0);\n      t[i] = temperature[i];\n    }\n\n    // Start recording derivative information\n    s.new_recording();\n\n    // Loop through the two channels\n    for (int ichan = 0; ichan < 2; ichan++) {\n      // Compute the emissivity profile\n      for (int i = 0; i < n; i++) {\n\temissivity[i] = 1.0-exp(-density_oxygen[i]*mass_abs_coefft[ichan]*dz);\n      }\n      // Simulate the radiance\n      r[ichan] = simulate_radiance_private(n, wavelength[ichan], \n\t\t\t\t\t   &emissivity[0], st, &t[0]);\n      // Copy the aReal variable to the Real variable\n      radiance[ichan] = r[ichan].value();\n    }\n\n    // Declare independent (x) and dependent (y) variables for\n    // Jacobian matrix\n    s.independent(st);\n    s.independent(&t[0], n);\n    s.dependent(r, 2);\n    \n    // Compute Jacobian matrix\n    std::vector<Real> jacobian((n+1)*2);\n    s.jacobian(&jacobian[0]);\n\n    // Copy elements of Jacobian matrix into the calling arrays\n    for (int ichan = 0; ichan < 2; ichan++) {\n      dradiance_dsurface_temperature[ichan] = jacobian[ichan];\n      for (int i = 0; i < n; i++) {\n\tdradiance_dtemperature[i*2+ichan] = jacobian[2+i*2+ichan];\n      }\n    }\n\n    // At the following curly bracket, the local Adept stack will be\n    // destructed\n  }\n\n  // Reactivate the Adept stack of the calling function\n  if (caller_stack != 0) {\n    caller_stack->activate();\n  }\n}\n"
  },
  {
    "path": "test/simulate_radiances.h",
    "content": "/* simulate_radiances.h - a function taking inactive arguments that returns also Jacobian matrices\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <adept.h>\n\nvoid simulate_radiances(int n, // Size of temperature array\n\t\t\t// Input variables:\n\t\t\tadept::Real surface_temperature, \n\t\t\tconst adept::Real* temperature,\n\t\t\t// Output variables:\n\t\t\tadept::Real radiance[2],\n\t\t\t// Output Jacobians:\n\t\t\tadept::Real dradiance_dsurface_temperature[2],\n\t\t\tadept::Real* dradiance_dtemperature);\n"
  },
  {
    "path": "test/state.cpp",
    "content": "/* state.cpp - An object-oriented interface to an Adept-based minimizer\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// Note that this implementation uses the GNU Scientific Library (GSL)\n// to provide the quasi-Newton minimization capability\n\n#include <iostream>\n#include <gsl/gsl_multimin.h>\n\n#include \"state.h\"\n\n// C functions needed by GSL\n\n// Return function value given a vector of state variables x\nextern \"C\" \ndouble my_function_value(const gsl_vector* x, void* params) {\n  State* state = reinterpret_cast<State*>(params);\n  return state->calc_function_value(x->data);\n}\n// Return gradient of function with respect to each state variable x\nextern \"C\"\nvoid my_function_gradient(const gsl_vector* x, void* params, gsl_vector* gradJ) { \n  State* state = reinterpret_cast<State*>(params);\n  state->calc_function_value_and_gradient(x->data, gradJ->data);\n}\n// Return both function and its gradient\nextern \"C\"\nvoid my_function_value_and_gradient(const gsl_vector* x, void* params,\n\t\t\t\t    double* J, gsl_vector* gradJ) { \n  State* state = reinterpret_cast<State*>(params);\n  *J = state->calc_function_value_and_gradient(x->data, gradJ->data);\n}\n\nusing adept::adouble;\n\n// \"State\" member function for returning the value of the function; it\n// does this by calling the underlying calc_function_value(const\n// adouble&) function, which is defined in\n// rosenbrock_banana_function.cpp.  Since the gradient is not\n// required, the recording of automatic differentiation is \"paused\"\n// while this function is called.\ndouble State::calc_function_value(const double* x) {\n  stack_.pause_recording();\n  for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i];\n  double result = value(calc_function_value(&active_x_[0]));\n  stack_.continue_recording();\n  return result;\n}\n\n// Member function for returning both the value of the function and\n// its gradient - here Adept is used to compute the gradient\ndouble State::calc_function_value_and_gradient(const double* x, double* dJ_dx) {\n  for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i];\n  stack_.new_recording();\n  adouble J = calc_function_value(&active_x_[0]);\n  J.set_gradient(1.0);\n  stack_.compute_adjoint();\n  adept::get_gradients(&active_x_[0], nx(), dJ_dx);\n  return value(J);\n}\n\n// Minimize the function, returning true if minimization successful,\n// false otherwise\nbool State::minimize() {\n  // Minimizer settings\n  const double initial_step_size = 0.01;\n  const double line_search_tolerance = 1.0e-4;\n  const double converged_gradient_norm = 1.0e-3;\n  // Use the \"limited-memory BFGS\" quasi-Newton minimizer\n  const gsl_multimin_fdfminimizer_type* minimizer_type\n    = gsl_multimin_fdfminimizer_vector_bfgs2;\n  \n  // Declare and populate structure containing function pointers\n  gsl_multimin_function_fdf my_function;\n  my_function.n = nx();\n  my_function.f = my_function_value;\n  my_function.df = my_function_gradient;\n  my_function.fdf = my_function_value_and_gradient;\n  my_function.params = reinterpret_cast<void*>(this);\n   \n  // Set initial state variables using GSL's vector type: use -5.0 for\n  // every value\n  gsl_vector *x;\n  x = gsl_vector_alloc(nx());\n  for (unsigned int i = 0; i < nx(); ++i) gsl_vector_set(x, i, -5.0);\n\n  // Configure the minimizer, and call function once\n  gsl_multimin_fdfminimizer* minimizer\n    = gsl_multimin_fdfminimizer_alloc(minimizer_type, nx());\n  gsl_multimin_fdfminimizer_set(minimizer, &my_function, x,\n\t\t\t\tinitial_step_size, line_search_tolerance);\n\n  // Print out the result of the first function call with the initial\n  // state\n  std::cout << \"Initial state: x = [\";\n  for (unsigned int i = 0; i < nx(); i++) {\n    std::cout << active_x_[i].value() << \" \";\n  }\n  std::cout << \"], cost_function = \" << minimizer->f << \"\\n\";\n\n  // Begin loop\n  size_t iter = 0;\n  int status;\n  do {\n    ++iter;\n    // Perform one iteration\n    status = gsl_multimin_fdfminimizer_iterate(minimizer);\n    \n    // Quit loop if iteration failed\n    if (status != GSL_SUCCESS) break;\n    \n    // Test for convergence\n    status = gsl_multimin_test_gradient(minimizer->gradient,\n\t\t\t\t\tconverged_gradient_norm);\n     \n    // Print out limited number of state variables from this\n    // iteration, and the corresponding cost function\n    std::cout << \"Iteration \" << iter << \": x = [\";\n    for (unsigned int i = 0; i < nx(); i++) {\n      std::cout << active_x_[i].value() << \" \";\n      if (i >= 5) {\n\tstd::cout << \"...\";\n\tbreak;\n      }\n    }\n    std::cout << \"], cost_function = \" << minimizer->f << \"\\n\";\n  }\n  while (status == GSL_CONTINUE && iter < 1000);\n\n  // Free memory\n  gsl_multimin_fdfminimizer_free(minimizer);\n  gsl_vector_free(x);\n\n  // Return true if successfully minimized function, false otherwise\n  if (status == GSL_SUCCESS) {\n    std::cout << \"Minimum found after \" << iter << \" iterations\\n\";\n    return true;\n  }\n  else {\n    std::cout << \"Minimizer failed after \" << iter << \" iterations: \"\n\t      << gsl_strerror(status) << \"\\n\";\n    return false;\n  }\n}\n\n// Enquiry function to return the current value of the state\n// variables, called after minimize() has been run.\nvoid\nState::x(std::vector<double>& x_out) const\n{\n  x_out.resize(nx());\n  for (unsigned int i = 0; i < nx(); i++) {\n    x_out[i] = active_x_[i].value();\n  }\n}\n"
  },
  {
    "path": "test/state.h",
    "content": "/* state.h - An object-oriented interface to an Adept-based minimizer\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#ifndef STATE_H\n#define STATE_H 1\n#include <vector>\n#include \"adept.h\"\nclass State {\npublic:\n  // Construct a state with n state variables\n  State(int n) { active_x_.resize(n); }\n  // Minimize the function, returning true if minimization\n  // successful, false otherwise\n  bool minimize();\n  // Get copy of state variables after minimization\n  void x(std::vector<double>& x_out) const;\n  // For input state variables x, compute the function J(x) and\n  // return it\n  double calc_function_value(const double* x);\n  // For input state variables x, compute function and put its\n  // gradient in dJ_dx\n  double calc_function_value_and_gradient(const double* x, double* dJ_dx);\n  // Return the size of the state vector\n  unsigned int nx() const { return active_x_.size(); }\nprotected:\n  // Active version of the function: the algorithm is contained in\n  // the definition of this function (in\n  // rosenbrock_banana_function.cpp)\n  adept::adouble calc_function_value(const adept::adouble* x);\n  // DATA\n  adept::Stack stack_;                    // Adept stack object\n  std::vector<adept::adouble> active_x_;  // Active state variables\n};\n#endif\n"
  },
  {
    "path": "test/test_adept.cpp",
    "content": "/* test_adept.cpp - Demonstration of basic features of Adept\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <iostream>\n\n#include \"adept.h\"\n\n// Provide function prototype for \"algorithm\"; see algorithm.cpp for\n// the contents of the function\n#include \"algorithm.h\"\n\nint\nmain(int argc, char** argv)\n{\n  using adept::adouble;\n  using adept::Real;\n\n  // Start an Adept stack before the first adouble object is\n  // constructed\n  adept::Stack s;\n\n  adouble x[2]; // Our independent variables\n  adouble y;    // Our dependent variable\n\n  // Set the values of x\n  x[0] = 2.0;\n  x[1] = 3.0;\n\n\n  // PART 1: NUMERICAL ADJOINT\n  std::cout << \"*** Computing numerical adjoint ***\\n\\n\";\n\n  // We will provide an estimate of the adjoints by perturbing the\n  // inputs by a small amount\n\n  adouble x_perturbed[2]; // Perturbed independent variables\n\n  // This version of the code uses the same algorithm function that\n  // takes adouble arguments for doing the numerical adjoint, even\n  // though we are not doing automatic differentiation. To make it\n  // faster, we can turn off the recording of derivative information\n  // using the pause_recording function.  This only works if all code\n  // has been compiled with the -DADEPT_RECORDING_PAUSABLE flag;\n  // otherwise it does nothing (so the program will still run\n  // correctly, but will be less efficient). Note that another\n  // approach if you want to call a function several times, sometimes\n  // with automatic differentiation and sometimes without, is\n  // demonstrated in\n  // test_adept_with_without_automatic_differentiation.cpp.\n  s.pause_recording();\n\n  // We will compare the Adept result to a numerically computed\n  // adjoint, so define the perturbation size\n  double dx = 1.0e-5;\n\n  // Run the algorithm\n  y = algorithm(x);\n\n  // Now perturb x[0] and x[1] in turn and get a numerical estimate of\n  // the gradient\n  x_perturbed[0] = x[0]+dx;\n  x_perturbed[1] = x[1];\n  double dy_dx0 = adept::value((algorithm(x_perturbed)-y)/dx);\n  x_perturbed[0] = x[0];\n  x_perturbed[1] = x[1]+dx;\n  double dy_dx1 = adept::value((algorithm(x_perturbed)-y)/dx);\n\n  // Turn the recording of deriviative information back on\n  s.continue_recording();\n\n  // Print information about the data held in the stack\n  std::cout << \"Stack status after numerical adjoint (if recording was successfully\\n\"\n\t    << \"paused then the number of operations should be zero):\\n\" \n\t    << s;\n  // Print memory information\n  std::cout << \"Memory usage: \" << s.memory() << \" bytes\\n\\n\";\n\n  // PART 2: REVERSE-MODE AUTOMATIC DIFFERENTIATION\n\n  // Now we use Adept to do the automatic differentiation\n  std::cout << \"*** Computing adjoint using automatic differentiation ***\\n\\n\";\n\n  // Start a new recording of derivative statements; note that this\n  // must be done after the independent variables x[0] and x[1] are\n  // defined and after they have been given their initial values\n  s.new_recording();\n\n  // Run the algorithm again\n  y = algorithm(x);\n\n  // Print information about the data held in the stack\n  std::cout << \"Stack status after algorithm run but adjoint not yet computed:\\n\"\n\t    << s;\n  // Print memory information\n  std::cout << \"Memory usage: \" << s.memory() << \" bytes\\n\\n\";\n\n  // If we set the adjoint of the dependent variable to 1 then the\n  // resulting adjoints of the independent variables after\n  // reverse-mode automatic differentiation will be comparable to the\n  // outputs of the numerical differentiation\n  y.set_gradient(1.0);\n\n  // Print out some diagnostic information\n  std::cout << \"List of derivative statements:\\n\";\n  s.print_statements();\n  std::cout << \"\\n\";\n\n  std::cout << \"Initial list of gradients:\\n\";\n  s.print_gradients();\n  std::cout << \"\\n\";\n\n  // Run the adjoint algorithm (reverse-mode differentiation)\n  s.reverse();\n\n  // Some more diagnostic information\n  std::cout << \"Final list of gradients:\\n\";\n  s.print_gradients();\n  std::cout << \"\\n\";\n  \n  // Extract the adjoints of the independent variables\n  double x0_ad = 0, x1_ad = 0; \n  x[0].get_gradient(x0_ad);\n  x[1].get_gradient(x1_ad);\n\n\n  // PART 3: JACOBIAN COMPUTATION\n\n  // Here we use the same recording to compute the Jacobian matrix\n  std::cout << \"*** Computing Jacobian matrix ***\\n\\n\";\n\n  s.independent(x, 2); // Declare independents\n  s.dependent(y);      // Declare dependents\n  Real jac[2];         // Where the Jacobian will be stored\n  s.jacobian(jac);     // Compute Jacobian\n\n\n  // PART 4: PRINT OUT RESULTS\n\n  // Print information about the data held in the stack\n  std::cout << \"Stack status after adjoint and Jacobian computed:\\n\"\n\t    << s;\n  // Print memory information\n  std::cout << \"Memory usage: \" << s.memory() << \" bytes\\n\\n\";\n\n  std::cout << \"Result of forward algorithm:\\n\";\n  std::cout << \"  y = \" << y.value() << \"\\n\";\n  \n  std::cout << \"Comparison of gradients:\\n\";\n  std::cout << \"  dy_dx0[numerical] = \" << dy_dx0 << \"\\n\";\n  std::cout << \"  dy_dx0[adjoint]   = \" << x0_ad  << \"\\n\";\n  std::cout << \"  dy_dx0[jacobian]  = \" << jac[0] << \"\\n\";\n  std::cout << \"  dy_dx1[numerical] = \" << dy_dx1 << \"\\n\";\n  std::cout << \"  dy_dx1[adjoint]   = \" << x1_ad  << \"\\n\";\n  std::cout << \"  dy_dx1[jacobian]  = \" << jac[1] << \"\\n\";\n\n  std::cout << \"\\nNote that the numerical gradients are less accurate since they use\\n\"\n\t    << \"a finite difference and are also succeptible to round-off error.\\n\";\n\n  return 0;\n\n}\n"
  },
  {
    "path": "test/test_adept_with_and_without_ad.cpp",
    "content": "/* test_adept_with_and_without_ad.cpp\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// Demonstration of the use of Adept with code (in this case,\n// algorithm.cpp) that has been compiled twice, once with automatic\n// differentiation enabled (the default) and once with it disabled\n// (using -DADEPT_NO_AUTOMATIC_DIFFERENTIATION) to provide a faster\n// version of a function that works with double rather than adouble\n// objects.\n\n#include <iostream>\n\n#include \"adept.h\"\n\n// Provide function prototypes for \"algorithm\"; see algorithm.cpp for\n// the contents of the function\n#include \"algorithm_with_and_without_ad.h\"\n\n// Simple demonstration of automatic differentiation using Adept\nint\nmain(int argc, char** argv)\n{\n  using adept::adouble;\n  using adept::Real;\n\n  // Start an Adept stack before the first adouble object is\n  // constructed\n  adept::Stack s;\n\n  adouble x[2]; // Our independent variables\n  adouble y;    // Our dependent variable\n\n  // Set the values of x\n  x[0] = 2.0;\n  x[1] = 3.0;\n\n\n  // PART 1: NUMERICAL ADJOINT\n  std::cout << \"*** Computing numerical adjoint ***\\n\\n\";\n\n  // We will compare the Adept result to a numerically computed\n  // adjoint, so define the perturbation size\n  double dx = 1.0e-5;\n\n  // Initialize a inactive version of x as double rather than adouble\n  // variables\n  double x_r[2];\n  x_r[0] = x[0].value();\n  x_r[1] = x[1].value();\n\n  // Run the original version of the algorithm that takes real\n  // arguments; this was compiled from algorithm.cpp using the\n  // -DADEPT_NO_AUTOMATIC_DIFFERENTIATION flag to produce the\n  // algorithm_noad.o object file\n  double y_real = algorithm(x_r);\n\n  // Now perturb x[0] and x[1] in turn and get a numerical estimate of\n  // the gradient\n  x_r[0] = x[0].value()+dx;\n  x_r[1] = x[1].value();\n  double dy_dx0 = (algorithm(x_r)-y_real)/dx;\n  x_r[0] = x[0].value();\n  x_r[1] = x[1].value()+dx;\n  double dy_dx1 = (algorithm(x_r)-y_real)/dx;\n\n  // Print information about the data held in the stack\n  std::cout << \"Stack status after numerical adjoint (number of operations should be zero):\\n\" \n\t    << s << \"\\n\";\n\n\n  // PART 2: REVERSE-MODE AUTOMATIC DIFFERENTIATION\n  std::cout << \"*** Computing adjoint using automatic differentiation ***\\n\\n\";\n\n  // Start a new recording of derivative statements (note that this\n  // must be done after the independent variables x[0] and x[1] are\n  // initialized\n  s.new_recording();\n\n  // Now use Adept to do it - first run the algorithm overloaded for\n  // adouble arguments\n  y = algorithm(x);\n\n  // Print information about the data held in the stack\n  std::cout << \"Stack status after algorithm run but adjoint not yet computed:\\n\"\n\t    << s << \"\\n\";\n\n  // If we set the adjoint of the dependent variable to 1 then the\n  // resulting adjoints of the independent variables after\n  // reverse-mode automatic differentiation will be comparable to the\n  // outputs of the numerical differentiation\n  y.set_gradient(1.0);\n\n  // Print out some diagnostic information\n  std::cout << \"List of derivative statements:\\n\";\n  s.print_statements();\n  std::cout << \"\\n\";\n\n  std::cout << \"Initial list of gradients:\\n\";\n  s.print_gradients();\n  std::cout << \"\\n\";\n\n  // Run the adjoint algorithm (reverse-mode differentiation)\n  s.reverse();\n\n  std::cout << \"Final list of gradients:\\n\";\n  s.print_gradients();\n  std::cout << \"\\n\";\n  \n  // Extract the adjoints of the independent variables\n  double x0_ad = 0, x1_ad = 0; \n  x[0].get_gradient(x0_ad);\n  x[1].get_gradient(x1_ad);\n\n\n  // PART 3: JACOBIAN COMPUTATION\n\n  // Here we use the same recording to compute the Jacobian matrix\n  std::cout << \"*** Computing Jacobian matrix ***\\n\\n\";\n\n  s.independent(x, 2); // Declare independents\n  s.dependent(y);      // Declare dependents\n  Real jac[2];         // Jacobian data must be of type \"Real\"\n  s.jacobian(jac);     // Compute Jacobian\n\n\n  // PART 4: PRINT OUT RESULT\n\n  // Print information about the data held in the stack\n  std::cout << \"Stack status after adjoint and Jacobian computed:\\n\"\n\t    << s << \"\\n\";\n\n  // Print memory information\n  std::cout << \"Memory usage: \" << s.memory() << \" bytes\\n\\n\";\n\n  std::cout << \"Result of forward algorithm:\\n\";\n  std::cout << \"  y[from algorithm taking double arguments]  = \" << y_real << \"\\n\";\n  std::cout << \"  y[from algorithm taking adouble arguments] = \" << y.value() << \"\\n\\n\";\n  \n  std::cout << \"Comparison of gradients:\\n\";\n  std::cout << \"  dy_dx0[numerical] = \" << dy_dx0 << \"\\n\";\n  std::cout << \"  dy_dx0[adjoint]   = \" << x0_ad  << \"\\n\";\n  std::cout << \"  dy_dx0[jacobian]  = \" << jac[0] << \"\\n\";\n  std::cout << \"  dy_dx1[numerical] = \" << dy_dx1 << \"\\n\";\n  std::cout << \"  dy_dx1[adjoint]   = \" << x1_ad  << \"\\n\";\n  std::cout << \"  dy_dx1[jacobian]  = \" << jac[1] << \"\\n\";\n\n  std::cout << \"\\nNote that the numerical gradients are less accurate since they use\\n\"\n\t    << \"a finite difference and are also succeptible to round-off error.\\n\";\n\n  return 0;\n\n}\n"
  },
  {
    "path": "test/test_array_derivatives.cpp",
    "content": "/* test_array_derivatives.cpp - Test derivatives of array expressions\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n*/\n\n#include <adept_arrays.h>\n\n\n// Arbitrary algorithm converting array of general type A to scalar of\n// type S, which may be active or passive\ntemplate <class A, class S>\nvoid algorithm(const A& x, S& y) {\n  using namespace adept;\n  A tmp;\n  intVector index(2);\n  index << 1, 0;\n  tmp = atan2((exp(x) * x), spread<0>(x(index,1),2)) / x(0,0);\n  y = sum(tmp);\n}\n\n\nint\nmain(int argc, const char** argv) {\n  using namespace adept;\n\n  Stack stack;\n\n  // Matrix dimension\n  static const int N = 2;\n  static const Real MAX_FRAC_ERR = 1.0e-5;\n\n  // Perturbation size for numerical calculation\n  Real dx = 1.0e-6;\n\n  if (sizeof(Real) < 8) {\n    // Single precision only works with larger perturbations\n    dx = 1.0e-4;\n  }\n\n  // Maximum fractional error\n  Real max_frac_err;\n  bool error_too_large = false;\n\n  // Input data\n  Matrix X(N,N);\n  X << 2, 3, 5, 7;\n  \n  // Numerical calculation \n  std::cout << \"NUMERICAL CALCULATION\\n\";\n  Matrix dJ_dx_num(N,N);\n  {\n    Real J;\n    algorithm(X, J);\n    std::cout << \"J = \" << J << \"\\n\";\n\n    for (int i = 0; i < N; ++i) {\n      for (int j = 0; j < N; ++j) {\n\tMatrix Xpert(N,N);\n\tXpert = X;\n\tXpert(i,j) += dx;\n\tReal Jpert;\n\talgorithm(Xpert, Jpert);\n\tdJ_dx_num(i,j) = (Jpert - J) / dx;\n      }\n    }\n  }\n\n  std::cout << \"dJ_dx_num = \" << dJ_dx_num << \"\\n\";\n\n  std::cout << \"\\nNUMERICAL CALCULATION WITH \\\"FixedArray\\\"\\n\";\n  Matrix22 dJ_dx_num_FixedArray;\n  {\n    Real J;\n    algorithm(X, J);\n    std::cout << \"J = \" << J << \"\\n\";\n\n    for (int i = 0; i < N; ++i) {\n      for (int j = 0; j < N; ++j) {\n\tMatrix22 Xpert = X;\n\tXpert(i,j) += dx;\n\tReal Jpert;\n\talgorithm(Xpert, Jpert);\n\tdJ_dx_num_FixedArray(i,j) = (Jpert - J) / dx;\n      }\n    }\n  }\n\n  std::cout << \"dJ_dx_num_FixedArray = \" << dJ_dx_num_FixedArray << \"\\n\";\n\n  // Adept calculation with aArray\n  std::cout << \"\\nADEPT CALCULATION WITH \\\"aArray\\\"\\n\";\n  Matrix dJ_dx_adept_Array(N,N);\n  {\n    aMatrix aX = X;\n    stack.new_recording();\n    aReal aJ;\n    algorithm(aX, aJ);\n    std::cout << \"J = \" << aJ << \"\\n\";\n    aJ.set_gradient(1.0);\n    stack.reverse();\n   \n    dJ_dx_adept_Array = aX.get_gradient();\n  }\n\n  std::cout << \"dJ_dx_adept_Array = \" << dJ_dx_adept_Array << \"\\n\";\n\n  max_frac_err = maxval(abs(dJ_dx_adept_Array-dJ_dx_num)/dJ_dx_num);\n  if (max_frac_err <= MAX_FRAC_ERR) {\n    std::cout << \"max fractional error = \" << max_frac_err\n\t\t<< \": PASSED\\n\";\n  }\n  else {\n    std::cout << \"max fractional error = \"\n\t      << max_frac_err << \": FAILED\\n\";\n    error_too_large = true;\n  }\n  // Adept calculation with aFixedArray\n  std::cout << \"\\nADEPT CALCULATION WITH \\\"aFixedArray\\\"\\n\";\n  Matrix dJ_dx_adept_FixedArray;\n  {\n    aMatrix22 aX = X;\n    stack.new_recording();\n    aReal aJ;\n    algorithm(aX, aJ);\n    std::cout << \"J = \" << aJ << \"\\n\";\n    aJ.set_gradient(1.0);\n    stack.reverse();\n    dJ_dx_adept_FixedArray = aX.get_gradient();\n\n  }\n  std::cout << \"dJ_dx_adept_FixedArray = \" << dJ_dx_adept_FixedArray << \"\\n\";\n\n  max_frac_err = maxval(abs(dJ_dx_adept_FixedArray-dJ_dx_num)/dJ_dx_num);\n  if (max_frac_err <= MAX_FRAC_ERR) {\n    std::cout << \"max fractional error = \" << max_frac_err\n\t\t<< \": PASSED\\n\";\n  }\n  else {\n    std::cout << \"max fractional error = \"\n\t      << max_frac_err << \": FAILED\\n\";\n    error_too_large = true;\n  }\n  \n\n  // Adept forward calculation with aArray: four (NxN) separate\n  // calculations are required to compute the derivative with respect\n  // to the four inputs.\n  std::cout << \"\\nADEPT FORWARD CALCULATION WITH \\\"aArray\\\"\\n\";\n  Matrix dJ_dx_adept_forward_Array(N,N);\n  {\n    aMatrix aX = X;\n    stack.new_recording();\n    aReal aJ;\n    algorithm(aX, aJ);\n    std::cout << \"J = \" << aJ << \"\\n\";\n\n    Matrix X_tl(N,N);\n\n    X_tl=0.0;\n    X_tl(0,0) = 1.0;\n    aX.set_gradient(X_tl);\n    stack.forward();\n    dJ_dx_adept_forward_Array(0,0) = aJ.get_gradient();\n\n    stack.clear_gradients();\n    X_tl=0.0;\n    X_tl(0,1) = 1.0;\n    aX.set_gradient(X_tl);\n    stack.forward();\n    dJ_dx_adept_forward_Array(0,1) = aJ.get_gradient();\n    \n    stack.clear_gradients();\n    X_tl=0.0;\n    X_tl(1,0) = 1.0;\n    aX.set_gradient(X_tl);\n    stack.forward();\n    dJ_dx_adept_forward_Array(1,0) = aJ.get_gradient();\n    \n    stack.clear_gradients();\n    X_tl=0.0;\n    X_tl(1,1) = 1.0;\n    aX.set_gradient(X_tl);\n    stack.forward();\n    dJ_dx_adept_forward_Array(1,1) = aJ.get_gradient();\n    \n  }\n\n  std::cout << \"dJ_dx_adept_forward_Array = \" << dJ_dx_adept_forward_Array << \"\\n\";\n\n  max_frac_err = maxval(abs(dJ_dx_adept_forward_Array-dJ_dx_num)/dJ_dx_num);\n  if (max_frac_err <= MAX_FRAC_ERR) {\n    std::cout << \"max fractional error = \" << max_frac_err\n\t\t<< \": PASSED\\n\";\n  }\n  else {\n    std::cout << \"max fractional error = \"\n\t      << max_frac_err << \": FAILED\\n\";\n    error_too_large = true;\n  }\n\n  \n  std::cout << \"\\n\";\n\n  if (error_too_large) {\n    std::cerr << \"*** Error: fractional error in the derivatives of some configurations too large\\n\";\n\n    if (sizeof(Real) < 8) {\n      std::cerr << \"*** (but you are using less than double precision so it is not surprising)\\n\";\n    }\n\n    return 1;\n  }\n  else {\n    return 0;\n  }\n\n\n}\n"
  },
  {
    "path": "test/test_array_speed.cpp",
    "content": "#include <iostream>\n#define ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n#define ADEPT_REAL_TYPE_SIZE 4\n#include <adept_arrays.h>\n#include \"Timer.h\"\n\n#define ASSIGN   =\n#define WARMUP_OPERATOR + exp\n#define OPERATOR + fastexp\n//#define SUFFIX_OP + 0.5\n#define SUFFIX_OP\n\nusing namespace adept;\n\nint main()\n{\n  Timer timer;\n  timer.print_on_exit();\n  int n = 128;\n\n  static const int rep = 10000;\n  //  static const int rep = 10;\n\n  std::cout << \"Packet<Real>::size = \" << internal::Packet<Real>::size << \"\\n\";\n\n  Stack stack;\n\n  aMatrix M(n,n), P(n,n), Q(n,n);\n  //  Array<2,aReal,false> M(n,n), P(n,n), Q(n,n);\n  aReal Mc[n][n], Pc[n][n], Qc[n][n];\n\n  for (int i = 0; i < n; ++i) {\n    for (int j = 0; j < n; ++j) {\n      P(i,j) = Pc[i][j] = 0.01 * (i-j);\n      Q(i,j) = Qc[i][j] = 0.1 * (j+1);\n      M(i,j) = Mc[i][j] = 0.0;\n    }\n  }\n\n  int t_c_style_w = timer.new_activity(\"C-style for loops (warm-up)\");\n  int t_c_style = timer.new_activity(\"C-style for loops\");\n  int t_adept_w = timer.new_activity(\"Adept (warm-up)\");\n  int t_adept = timer.new_activity(\"Adept\");\n  int t_adept_container_w = timer.new_activity(\"Adept container only (warm-up)\");\n  int t_adept_container = timer.new_activity(\"Adept container only\");\n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n  int t_jacobian_w = timer.new_activity(\"Jacobian (warm-up)\");\n  int t_jacobian = timer.new_activity(\"Jacobian\");\n  int t_jacobian_array_w = timer.new_activity(\"Jacobian array-op (warm-up)\");\n  int t_jacobian_array = timer.new_activity(\"Jacobian array-op\");\n#endif\n\n  stack.new_recording();\n  timer.start(t_c_style_w);\n  for (int irep = 0; irep < rep; ++irep) {\n    for (int i = 0; i < n; ++i) {\n      for (int j = 0; j < n; ++j) {\n\tMc[i][j] ASSIGN Pc[i][j] WARMUP_OPERATOR (Qc[i][j] SUFFIX_OP);\n      }\n    }\n  }\n  timer.stop();\n\n  if (n <= 10) {\n    std::cout << \"C-style M = \\n\";\n    for (int i = 0; i < n; ++i) {\n      for (int j = 0; j < n; ++j) {\n\tstd::cout << \" \" << Mc[i][j];\n      }\n      std::cout << \"\\n\";\n    }\n  }\n  \n  //  std::cout << stack;\n\n  stack.new_recording();\n  timer.start(t_c_style);\n  for (int irep = 0; irep < rep; ++irep) {\n    for (int i = 0; i < n; ++i) {\n      for (int j = 0; j < n; ++j) {\n\tMc[i][j] ASSIGN Pc[i][j] OPERATOR (Qc[i][j] SUFFIX_OP);\n      }\n    }\n  }\n  timer.stop();\n  //  std::cout << stack;\n\n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n  stack.independent(&Pc[0][0], n*n);\n  stack.dependent(&Mc[0][0], n*n);\n\n  timer.start(t_jacobian_w);\n  Real* jac;\n  jac = new Real[n*n*n*n];\n\n  stack.jacobian_forward(jac);\n  timer.stop();\n  timer.start(t_jacobian);\n  stack.jacobian_forward(jac);\n  timer.stop();\n#endif\n\n\n  //  std::cout << Mc[0][0] << \" \" << Mc[10][10] << \"\\n\";\n\n  stack.new_recording();\n  timer.start(t_adept_w);\n  for (int irep = 0; irep < rep; ++irep) {\n    //    M ASSIGN noalias(P WARMUP_OPERATOR (Q SUFFIX_OP));\n    M ASSIGN P WARMUP_OPERATOR (Q SUFFIX_OP);\n  }\n  timer.stop();\n  //  std::cout << stack;\n\n  if (n <= 10) {\n    std::cout << \"Array-style M = \\n\";\n    for (int i = 0; i < n; ++i) {\n      for (int j = 0; j < n; ++j) {\n\tstd::cout << \" \" << M(i,j);\n      }\n      std::cout << \"\\n\";\n    }\n  }\n\n  std::cout << \"Alignment offset = \" << (P OPERATOR (Q SUFFIX_OP)).alignment_offset() << \"\\n\";\n\n\n  stack.new_recording();\n  timer.start(t_adept);\n  for (int irep = 0; irep < rep; ++irep) {\n    //    M += noalias(P OPERATOR (Q SUFFIX_OP));\n    M ASSIGN P OPERATOR (Q SUFFIX_OP);\n  }\n  timer.stop();\n  //  std::cout << stack;\n\n\n#ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION\n\n  stack.clear_independents();\n  stack.clear_dependents();\n  stack.independent(P);\n  stack.dependent(Q);\n  //  stack.independent(P.data(), n*n);\n  //  stack.dependent(M.data(), n*n);\n\n  std::cout << stack;\n\n  timer.start(t_jacobian_array_w);\n  stack.jacobian_forward(jac);\n  timer.stop();\n  timer.start(t_jacobian_array);\n  stack.jacobian_forward(jac);\n  timer.stop();\n#endif\n\n  stack.new_recording();\n  timer.start(t_adept_container_w);\n  for (int irep = 0; irep < rep; ++irep) {\n    for (int i = 0; i < n; ++i) {\n      for (int j = 0; j < n; ++j) {\n\tM(i,j) ASSIGN P(i,j) WARMUP_OPERATOR (Q(i,j) SUFFIX_OP);\n      }\n    }\n  }\n  timer.stop();\n  //  std::cout << stack;\n  //  std::cout << M;\n\n  stack.new_recording();\n  timer.start(t_adept_container);\n  for (int irep = 0; irep < rep; ++irep) {\n    for (int i = 0; i < n; ++i) {\n      for (int j = 0; j < n; ++j) {\n\tM(i,j) ASSIGN P(i,j) OPERATOR (Q(i,j) SUFFIX_OP);\n      }\n    }\n  }\n  timer.stop();\n  //  std::cout << stack;\n}\n"
  },
  {
    "path": "test/test_arrays.cpp",
    "content": "/* test_arrays.cpp - Test Adept's array functionality\n\n    Copyright (C) 2016-2018 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n  This program can be compiled to run in three ways: (1) normal\n  compilation tests inactive arrays, (2) with -DALL_ACTIVE tests\n  active arrays, and (3) \"-DALL_ACTIVE -DADEPT_RECORDING_PAUSABLE\"\n  tests whether a \"paused\" recording correctly records nothing to the\n  automatic-differentiation stack.\n\n*/\n\n#include <iostream>\n#include <complex>\n\n#define ADEPT_BOUNDS_CHECKING 1\n\n#include <adept_arrays.h>\n\n//#define TRAP_FLOATING_POINT_EXCEPTIONS 1\n#ifdef TRAP_FLOATING_POINT_EXCEPTIONS\n#include <fenv.h>\n#endif\n\n\n// The following controls whether to use active variables or not\n//#define ALL_ACTIVE 1\n//#define MARVEL_STYLE 1\n//#define ALL_COMPLEX 1\n\nusing namespace adept;\n\n\nint\nmain(int argc, const char** argv) {\n  using namespace adept;\n\n#ifdef TRAP_FLOATING_POINT_EXCEPTIONS\n  feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);\n#endif\n\n#ifdef ALL_ACTIVE\n#define IsActive true\n  Stack stack;\n#else\n#define IsActive false\n#endif\n  \n#define HEADING(MESSAGE)\t\t\t\t\t\t\\\n  std::cout << \"====================================================================\\n\" \\\n\t    << \"   TESTING \" << MESSAGE << \"\\n\"\n\n#define COMMA ,\n\n\n#define SIMPLE_EVAL(MESSAGE, TYPE, X, INIT, EXPR)\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\\\n    TYPE X;\t\t\t\t\t\t\t\t\\\n    if (INIT) {\t\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n#define EVAL(MESSAGE, TYPE, X, INIT, EXPR)\t\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\\\n    TYPE X;\t\t\t\t\t\t\t\t\\\n    if (INIT) {\t\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n      std::cout << #TYPE << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << #TYPE << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n  \n#ifdef ALL_ACTIVE\n#define EVAL2(MESSAGE, TYPEX, X, INITX, TYPEY, Y, EXPR)\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    if (INITX) {\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    TYPEY Y; Y = test. Y;\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    int nop=stack.n_operations();\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    std::cout << \"Differential operations: \" << stack.n_operations()-nop << \"\\n\";\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t        \\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n#else\n#define EVAL2(MESSAGE, TYPEX, X, INITX, TYPEY, Y, EXPR)\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    if (INITX) {\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    TYPEY Y; Y = test. Y;\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t        \\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n#endif\n\n#define EVAL3(MESSAGE, TYPEX, X, INITX, TYPEY, Y, TYPEZ, Z, EXPR)\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\"; \\\n  try {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    if (INITX) {\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    TYPEY Y; Y.link( test. Y );\t\t\t\t\t\t\\\n    TYPEZ Z; Z.link( test. Z );\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << #TYPEZ << \" \" << #Z << \" = \" << Z << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t        \\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n#define EVAL_NO_TRAP(MESSAGE, TYPE, X, INIT, EXPR)\t\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    TYPE X;\t\t\t\t\t\t\t\t\\\n    if (INIT) {\t\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n      std::cout << #TYPE << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << #TYPE << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }  \n\n#define EVAL2_NO_TRAP(MESSAGE, TYPEX, X, INITX, TYPEY, Y, EXPR)\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n###  \" << #EXPR << \"\\n\";\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    if (INITX) {\t\t\t\t\t\t\t\t\\\n      X = test. X;\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    TYPEY Y; Y = test. Y;\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n#ifndef ALL_COMPLEX\n\n#ifdef ALL_ACTIVE\n#ifndef MARVEL_STYLE\n  typedef aReal myReal;\n  typedef aMatrix myMatrix;\n  typedef aVector myVector;\n  typedef aSymmMatrix mySymmMatrix;\n  //typedef aSquareMatrix mySymmMatrix;\n  typedef aDiagMatrix myDiagMatrix;\n  typedef aTridiagMatrix myTridiagMatrix;\n  typedef aLowerMatrix myLowerMatrix;\n  typedef aUpperMatrix myUpperMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,2,1>,true> myOddBandMatrix;\n  typedef aArray3D myArray3D;\n#else\n  typedef aReal myReal;\n  typedef Array<2,aReal,false> myMatrix;\n  typedef Array<1,aReal,false> myVector;\n  typedef SpecialMatrix<aReal,internal::SquareEngine<ROW_MAJOR>,false> mySymmMatrix;\n  typedef SpecialMatrix<aReal,internal::BandEngine<ROW_MAJOR,0,0>,false> myDiagMatrix;\n  typedef SpecialMatrix<aReal,internal::BandEngine<ROW_MAJOR,1,1>,false> myTridiagMatrix;\n  typedef SpecialMatrix<aReal,internal::LowerEngine<ROW_MAJOR>, false> myLowerMatrix;\n  typedef SpecialMatrix<aReal,internal::UpperEngine<ROW_MAJOR>, false> myUpperMatrix;\n  typedef SpecialMatrix<aReal,internal::BandEngine<ROW_MAJOR,2,1>,false> myOddBandMatrix;\n\n#endif\n#else\n\n  typedef Real   myReal;\n  typedef Matrix myMatrix;\n  typedef Vector myVector;\n  typedef Array3D myArray3D;\n\n  typedef SymmMatrix mySymmMatrix;\n  //typedef SquareMatrix mySymmMatrix;\n  typedef DiagMatrix myDiagMatrix;\n  typedef TridiagMatrix myTridiagMatrix;\n  typedef LowerMatrix myLowerMatrix;\n  typedef UpperMatrix myUpperMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<ROW_MAJOR,2,1>,false> myOddBandMatrix;\n\n  /*    \n  typedef SpecialMatrix<Real,SymmEngine<ROW_UPPER_COL_LOWER>,false> mySymmMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<COL_MAJOR,0,0>,false> myDiagMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<COL_MAJOR,1,1>,false> myTridiagMatrix;\n  typedef SpecialMatrix<Real,internal::BandEngine<COL_MAJOR,2,1>,false> myOddBandMatrix;\n  */\n\n#endif\n\n\n#else\n  typedef std::complex<Real> myReal;\n  typedef Array<1,std::complex<Real>,IsActive> myVector;\n  typedef Array<2,std::complex<Real>,IsActive> myMatrix;\n  typedef Array<3,std::complex<Real>,IsActive> myArray3D;\n  typedef SpecialMatrix<std::complex<Real>,internal::SquareEngine<ROW_MAJOR>,IsActive> mySymmMatrix;\n  typedef SpecialMatrix<std::complex<Real>,internal::BandEngine<ROW_MAJOR,0,0>,IsActive> myDiagMatrix;\n  typedef SpecialMatrix<std::complex<Real>,internal::BandEngine<ROW_MAJOR,1,1>,IsActive> myTridiagMatrix;\n  typedef SpecialMatrix<std::complex<Real>,internal::LowerEngine<ROW_MAJOR>, IsActive> myLowerMatrix;\n  typedef SpecialMatrix<std::complex<Real>,internal::UpperEngine<ROW_MAJOR>, IsActive> myUpperMatrix;\n  typedef SpecialMatrix<std::complex<Real>,internal::BandEngine<ROW_MAJOR,2,1>,IsActive> myOddBandMatrix;\n\n#endif\n\n  struct Test {\n\n    bool b;\n    boolVector B;\n    int c;\n    myReal x;\n    myVector v, w, vlong;\n    myMatrix M, N;\n    myMatrix Mstrided;\n    myMatrix S;\n    mySymmMatrix O, P;\n    myDiagMatrix D, E;\n    myTridiagMatrix T, TT;\n    myLowerMatrix L, LL;\n    myUpperMatrix U, UU;\n    myOddBandMatrix Q, R;\n    intVector index;\n    myArray3D A;\n\n#define MINI_TEST\n#ifdef MINI_TEST\n#define DIM1 3\n#define DIM2 2\n#define DIM3 5\n#define DIMLONG 12\n#else\n#define DIM1 12\n#define DIM2 10\n#define DIM3 15\n#define DIMLONG 20\n#endif\n    Test() {\n#ifdef ALL_COMPLEX\n#define I std::complex<Real>(0.0,1.0)\n#else\n#define I 0.0\n#endif\n      b = false;\n      B.resize(DIM1); B = false;\n      c = 0;\n      x = -2;\n      v.resize(DIM1);\n      vlong.resize(DIMLONG); vlong = linspace(1,DIMLONG,DIMLONG);\n      w.resize(DIM1);\n      M.resize(DIM2,DIM1);\n      myMatrix Mtmp(DIM2*3,DIM1*2);\n      Mstrided.link(Mtmp(stride(0,end,3),stride(0,end,2)));\n      N.resize(DIM2,DIM1);\n      S.resize(DIM1,DIM1);\n      O.resize(DIM1);\n      Q.resize(DIM3);\n      index.resize(DIM2);\n      v(0) = 2.0 + 3.0*I; v(1) = 3; v(2) = 5;\n      w(0) = 7.0 + 4.0*I; w(1) = 11; w(2) = 13;\n      M(0,0) = 2.0 + 3.0*I; M(0,1) = 3; M(0,2) = 5;\n      M(1,0) = 7; M(1,1) = 11; M(1,2) = 13;\n      Mstrided = M;\n      N(0,0) = 17.0+5.0*I; N(0,1) = 19; N(0,2) = 23;\n      N(1,0) = 29; N(1,1) = 31; N(1,2) = 37;\n      S(0,0) = 2.0+3.0*I; S(0,1) = 3; S(0,2) = 5;\n      S(1,0) = 7.0+4.0*I; S(1,1) = 11; S(1,2) = 13;\n      S(2,0) = 17; S(2,1) = 19; S(2,2) = 23;\n\n      O(0,0) = 7.0+3.0*I;\n      O(1,0) = 2; O(1,1) = 11;\n      O(2,0) = 3; O(2,1) = 5; O(2,2) = 13;\n\n      P = 14.0 - O;\n\n      Q.diag_vector(-2) = 1;\n      Q.diag_vector(-1) = 2;\n      Q.diag_vector(0)  = 3;\n      Q.diag_vector(1)  = 4;\n\n      D = S;\n      T = S;\n      L = S;\n      U = S;\n\n      A.resize(DIM2,DIM1,DIM2);\n      A << 2.0+3.0*I, 3, 5, 7, 11, 13,\n\t17, 19, 23, 29, 31,37;\n\n      index << 1, 0;\n    }\n  };\n\n#ifdef ALL_ACTIVE\n#ifndef ADEPT_RECORDING_PAUSABLE\n  stack.new_recording();\n#else\n  stack.pause_recording();\n#endif\n#endif\n\n  Test test;\n\n  bool should_fail=false;\n  int anomalous_results=0;\n\n  std::cout << adept::configuration();\n\n#ifdef ALL_ACTIVE\n  std::cout << \"Testing ACTIVE arrays\\n\";\n#else\n  std::cout << \"Testing INACTIVE arrays\\n\";\n#endif\n#ifdef ALL_COMPLEX\n  std::cout << \"Testing COMPLEX arrays\\n\";\n#endif\n\n\n  HEADING(\"ARRAY FUNCTIONALITY\");\n  EVAL(\"Array \\\"resize\\\" member function\", myMatrix, M, true, M.resize(1,5));\n  \n  should_fail=true;\n  EVAL(\"Array \\\"resize\\\" with invalid dimensions\", myMatrix, M, true, M.resize(1));\n  should_fail=false;\n  EVAL(\"Array \\\"resize\\\" with \\\"dimensions\\\" function\", myMatrix, M, true, M.resize(dimensions(4,2)));\n\n  EVAL(\"Array \\\"clear\\\" member function\", myMatrix, M, true, M.clear());\n\n#ifdef ADEPT_CXX11_FEATURES\n  HEADING(\"INITIALIZER LISTS (C++11 ONLY)\");\n  EVAL(\"Vector assignment to initializer list from empty\", myVector, v,\n       false, v = {1 COMMA 2});\n  EVAL(\"Vector assignment to initializer list with underfill\", myVector, v,\n       true, v = {1.0 COMMA 2.0});\n  should_fail = true;\n  EVAL(\"Vector assignment to initializer list with overfill (SHOULD FAIL)\", myVector, v,\n    true, v = {1.0 COMMA 2.0 COMMA 3.0 COMMA 4.0});\n  should_fail = false;\n  EVAL(\"Matrix assignment to initializer list from empty\", myMatrix, M,\n    false, M = { {1 COMMA 2} COMMA {3 COMMA 4} });\n  EVAL(\"Matrix assignment to initializer list with underfill\", myMatrix, M,\n    true, M = { {1.0 COMMA 2.0} COMMA {3.0 COMMA 4.0} });\n  should_fail = true;\n  EVAL(\"Matrix assignment to initializer list with overfill (SHOULD FAIL)\", myMatrix, M,\n    true, M = { {1.0 COMMA 2.0 COMMA 3.0 COMMA 4.0} });\n  should_fail = false;\n  EVAL(\"Initializer list in expression\", myVector, v,\n    true, v = v + Vector({1.0 COMMA 2.0 COMMA 3.0}));\n  EVAL2(\"Indexed matrix assigned to initializer list\", myMatrix, M, true, intVector, index, \n\tM(index,index) = {{1 COMMA 2} COMMA {3 COMMA 4}});\n\n#endif\n\n\n  HEADING(\"BASIC EXPRESSIONS\");\n  EVAL2(\"Vector assignment to vector from empty\", myVector, v, false, myVector, w, v = w);\n  EVAL2(\"Vector assignment to expression from empty\", myVector, v, false, myVector, w, v = log(w) + 1.0);\n\n  /*\n  should_fail=true;\n  EVAL(\"Vector = operator from empty (SHOULD FAIL)\", myVector, v, false, v = 1.0);\n  EVAL(\"Vector += operator from empty (SHOULD FAIL)\", myVector, v, false, v += 1.0);\n  should_fail=false;\n  */\n\n  EVAL(\"Matrix *= operator\", myMatrix, M, true, M *= 0.5);\n  EVAL2(\"Matrix = scalar\", myMatrix, M, true, myReal, x, M = x);\n  EVAL2(\"Matrix = scalar expression\", myMatrix, M, true, myReal, x, M = (10.0*x));\n#ifndef ALL_COMPLEX\n  HEADING(\"BASIC FUNCTIONS\");\n  EVAL2(\"max\", myVector, v, true, myVector, w, v = max(v,w/3.0));\n  EVAL2(\"min\", myVector, v, true, myVector, w, v = min(v,w/3.0));\n#endif\n\n  HEADING(\"ARRAY SLICING\");\n  EVAL2(\"Array indexing rvalue\", myReal, x, true, myMatrix, M, x = M(1,end-1));\n\n  should_fail=true;\n  EVAL2(\"Array indexing rvalue out of range (SHOULD FAIL)\", myReal, x, true, myMatrix, M, x = M(1,3));\n  should_fail=false;\n\n  EVAL(\"Array indexing lvalue\", myMatrix, M, true, M(1,end-1) *= -1.0);\n\n  EVAL2(\"contiguous subarray rvalue\", myVector, v, false, myMatrix, M, v = M(__,end));\n  EVAL(\"contiguous subarray lvalue\", myMatrix, M, true, M(end-1,__) /= 2.0);\n  EVAL2(\"contiguous subarray rvalue and lvalue\", myMatrix, M, true, myMatrix, N, M(__,1) = N(__,2));\n  EVAL2(\"contiguous subarray rvalue using range\", myVector, v, false, myMatrix, M, v = 2.0 * M(1,range(1,2)));\n  EVAL2(\"contiguous subarray lvalue using range\", myMatrix, M, true, myVector, v, M(end-1,range(0,1)) = log(v(range(1,2))));\n  EVAL2(\"contiguous subarray rvalue using subset\", myMatrix, M, false, myMatrix, N, M = 2.0 * N.subset(1,1,1,2));\n  EVAL(\"contiguous subarray lvalue using subset\", myVector, v, true, v.subset(end-1,end) *= 10.0);\n  EVAL2(\"regular subarray rvalue\", myVector, v, false, myVector, w, v = w(stride(end,0,-1)));\n  EVAL2(\"regular subarray lvalue\", myMatrix, M, true, myVector, w, M(0,stride(0,end,2)) *= w(stride(end,0,-2)));\n#ifndef ALL_COMPLEX\n  EVAL2(\"irregular subarray rvalue\", myMatrix, M, false, myMatrix, N, M = N(stride(1,0,-1),find(N(0,__)>18)));\n  EVAL(\"irregular subarray lvalue\", myMatrix, M, true, M(stride(1,0,-1),find(M(0,__)>4)) = 0);\n#endif\n  EVAL(\"slice leading dimension\", myMatrix, M, true, M[end] = 0);\n  EVAL(\"slice two dimensions\", myMatrix, M, true, M[end][0] = 0);\n  EVAL2(\"diag_vector member function as rvalue\", myVector, v, false, myMatrix, S, v = diag_vector(S,1));\n  EVAL2(\"diag_vector member function as lvalue\", myMatrix, S, true, myVector, v, S.diag_vector() += v);\n  EVAL2(\"diag_matrix member function\", myMatrix, S, false, myVector, v, S = v.diag_matrix());\n  EVAL2(\"diag_matrix external function\", myMatrix, S, false, myVector, v, S = diag_matrix(v));\n  EVAL2(\"transpose as rvalue via T member function\", myMatrix, N, false, myMatrix, M, N = 2.0 * M.T());\n  EVAL2(\"transpose as rvalue via permute member function\", myMatrix, N, false, myMatrix, M, N = 2.0 * M.permute(1,0));\n  EVAL3(\"matrix indexing (scalar,non-contiguous)\", myVector, v, false, myMatrix, N, intVector, index, v = N(1,index)); \n  EVAL3(\"matrix indexing (non-contiguous,scalar)\", myVector, v, false, myMatrix, N, intVector, index, v = N(index,1)); \n  EVAL3(\"2D arbitrary index as rvalue\", myMatrix, M, false, myMatrix, N, intVector, index, M = const_cast<const myMatrix&>(N)(index,index));\n  EVAL3(\"2D arbitrary index as lvalue assigned to scalar expression\", myMatrix, M, true, myMatrix, N, intVector, index, M(index,index) = 2.0*(myReal)(4.0));\n  EVAL3(\"2D arbitrary index as lvalue\", myMatrix, M, true, myMatrix, N, intVector, index, M(index,index) = N(__,range(1,2)));\n  EVAL2(\"2D arbitrary index as lvalue with assign-multiply operator\", myMatrix, M, true, intVector, index, M(index,index) *= 10.0);\n  EVAL2(\"2D arbitrary index as lvalue with aliased right-hand-side\", myMatrix, M, true, intVector, index, M(index,index) = M(__,range(0,1)));\n  EVAL2(\"2D arbitrary index as lvalue with aliased right-hand-side and eval function\", myMatrix, M, true, intVector, index, M(index,index) = eval(M(__,range(0,1))));\n  EVAL2(\"reshape member function\", myMatrix, M, false, myVector, vlong, M >>= vlong.reshape(3,4));\n  should_fail=true;\n  EVAL2(\"reshape member function with invalid dimensions\", myMatrix, M, false, myVector, vlong, M >>= vlong.reshape(5,5));\n  should_fail=false;\n  EVAL(\"end/2 indexing\", myVector, vlong, true, vlong(range(end/2,end)) = 0.0);\n  EVAL(\"end/2 indexing\", myVector, vlong, true, vlong(range(0,end/2)) = 0.0);\n  EVAL(\"end/2 indexing\", myVector, vlong, true, vlong.subset(end/2,end) = 0.0);\n\n  HEADING(\"REDUCTION OPERATIONS\"); \n  EVAL2(\"full sum\", myReal, x, true, myMatrix, M, x = sum(M));\n  EVAL2(\"full mean\", myReal, x, true, myMatrix, M, x = mean(M));\n  EVAL2(\"full product\", myReal, x, true, myMatrix, M, x = product(M));\n  EVAL2(\"full norm2\", myReal, x, true, myMatrix, M, x = norm2(M));\n#ifndef ALL_COMPLEX\n  EVAL2(\"full maxval\", myReal, x, true, myMatrix, M, x = maxval(M));\n  EVAL2(\"full minval\", myReal, x, true, myMatrix, M, x = minval(-M));\n#endif\n  \n  EVAL2(\"1-dimension sum\", myVector, v, true, myMatrix, M, v += sum(M,0));\n  EVAL2(\"1-dimension mean\", myVector, v, false, myMatrix, M, v = mean(M*M,1));\n  EVAL2(\"1-dimension product\", myVector, v, false, myMatrix, M, v = product(M,1));\n  EVAL2(\"1-dimension norm2\", myVector, v, false, myMatrix, M, v = norm2(M,1));\n  //  EVAL2(\"1-dimension sum\", myMatrix, M, false, myArray3D, A, M = sum(A,2));\n#ifndef ALL_COMPLEX\n  EVAL2(\"1-dimension maxval\", myVector, v, false, myMatrix, M, v = maxval(M,1));\n  EVAL2(\"1-dimension minval\", myVector, v, false, myMatrix, M, v = minval(M,1));\n\n  EVAL2(\"dot product\", myReal, x, true, myVector, w, x = dot_product(w,w(stride(end,0,-1))));\n  EVAL2(\"dot product on expressions\", myReal, x, true, myVector, w, x = dot_product(2.0*w,w(stride(end,0,-1))+1.0));\n  EVAL2(\"1D interpolation\", myVector, v, true, myVector, w, v = interp(value(v), w, Vector(value(w)/2.0)));\n  EVAL2(\"1D clamped interpolation\", myVector, v, true, myVector, w, v = interp(value(v), w, value(w)/2.0, ADEPT_EXTRAPOLATE_CLAMP));\n#ifndef ALL_ACTIVE\n  EVAL2(\"1D interpolation of matrix\", myMatrix, M, true, myVector, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0));\n  EVAL2(\"1D clamped interpolation of matrix\", myMatrix, M, true, myVector, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0, ADEPT_EXTRAPOLATE_CLAMP));\n#endif\n  EVAL2(\"all reduction\", bool, b, true, myMatrix, M, b = all(M > 8.0));\n  EVAL2(\"any reduction\", bool, b, true, myMatrix, M, b = any(M > 8.0));\n  EVAL2(\"count reduction\", int, c, true, myMatrix, M, c = count(M > 8.0));\n  EVAL2(\"1-dimension all reduction\", boolVector, B, false, myMatrix, M, B = all(M > 8.0, 1));\n  EVAL2(\"1-dimension any reduction\", boolVector, B, false, myMatrix, M, B = any(M > 8.0, 1));\n  EVAL2(\"1-dimension count reduction\", intVector, index, false, myMatrix, M, index = count(M > 8.0, 1));\n  HEADING(\"CONDITIONAL OPERATIONS\");\n  EVAL2(\"where construct, scalar right-hand-side\", myMatrix, M, true, myMatrix, N, M.where(N > 20) = 0);\n  EVAL2(\"where construct, expression right-hand-side\", myMatrix, M, true, myMatrix, N, M.where(N > 20) = -N);\n  EVAL2(\"where construct, scalar either-or right-hand-side\", myMatrix, M, true, myMatrix, N, M.where(N > 20) = either_or(0,1));\n  EVAL2(\"where construct, expression either-or right-hand-side\", myMatrix, M, true, myMatrix, N, M.where(N > 20) = either_or(-N,N));\n  EVAL_NO_TRAP(\"find construct, scalar right-hand-side\", myVector, v, true, v(find(v > 3.5)) = 0);\n  EVAL(\"find construct, expression right-hand-side\", myVector, v, true, v(find(v > 3.5)) = -v(range(end,end)));\n  EVAL(\"find construct, multiply-assign right-hand-side\", myVector, v, true, v(find(v != 5.0)) *= 10.0);\n#endif\n  HEADING(\"SPECIAL SQUARE MATRICES\");\n  EVAL(\"SymmMatrix \\\"resize\\\" member function\", mySymmMatrix, O, true, O.resize(5));\n\n  should_fail = true;\n  EVAL(\"SymmMatrix \\\"resize\\\" with invalid dimensions\", mySymmMatrix, O, true, O.resize(4,5));\n  should_fail = false;\n\n  EVAL(\"SymmMatrix \\\"clear\\\" member function\", mySymmMatrix, O, true, O.clear());\n  EVAL2(\"SymmMatrix assign from dense matrix\", mySymmMatrix, O, false, myMatrix, S, O = S);\n  EVAL2(\"DiagMatrix assign from dense matrix\", myDiagMatrix, D, false, myMatrix, S, D = S);\n  EVAL2(\"TridiagMatrix assign from dense matrix\", myTridiagMatrix, T, false, myMatrix, S, T = S);\n  EVAL2(\"LowerMatrix assign from dense matrix\", myLowerMatrix, L, false, myMatrix, S, L = S);\n  EVAL2(\"UpperMatrix assign from dense matrix\", myUpperMatrix, U, false, myMatrix, S, U = S);\n  EVAL(\"SymmMatrix += operator\", mySymmMatrix, O, true, O += 3.0);\n  EVAL(\"DiagMatrix += operator\", myDiagMatrix, D, true, D += 3.0);\n  EVAL(\"TridiagMatrix += operator\", myTridiagMatrix, T, true, T += 3.0);\n  EVAL(\"LowerMatrix += operator\", myLowerMatrix, L, true, L += 3.0);\n  EVAL(\"UpperMatrix += operator\", myUpperMatrix, U, true, U += 3.0);\n  EVAL2(\"SymmMatrix as rvalue\", myMatrix, M, false, mySymmMatrix, O, M = O);\n  EVAL2(\"DiagMatrix as rvalue\", myMatrix, M, false, myDiagMatrix, D, M = D);\n  EVAL2(\"TridiagMatrix as rvalue\", myMatrix, M, false, myTridiagMatrix, T, M = T);\n  EVAL2(\"LowerMatrix as rvalue\", myMatrix, M, false, myLowerMatrix, L, M = L);\n  EVAL2(\"UpperMatrix as rvalue\", myMatrix, M, false, myUpperMatrix, U, M = U);\n  EVAL(\"SymmMatrix assign from scalar expression\", mySymmMatrix, O, true, O = 2.0*(myReal)(4.0));\n  EVAL(\"UpperMatrix assign from scalar expression\", myUpperMatrix, U, true, U = 2.0*(myReal)(4.0));\n\n\n  EVAL(\"SymmMatrix diag_vector member function as lvalue (upper)\", mySymmMatrix, O, true, O.diag_vector(1) = 0);\n  EVAL(\"SymmMatrix diag_vector member function as lvalue (lower)\", mySymmMatrix, O, true, O.diag_vector(-2) += 10.0);\n  EVAL(\"DiagMatrix diag_vector member function as lvalue\", myDiagMatrix, D, true, D.diag_vector() = 0.0);\n\n  should_fail = true;\n  EVAL(\"DiagMatrix diag_vector member function incorrectly using offdiagonal\", myDiagMatrix, D, true, D.diag_vector(1) = 0.0);\n  should_fail = false;\n\n  EVAL(\"TridiagMatrix diag_vector member function as lvalue (upper)\", myTridiagMatrix, T, true, T.diag_vector(1) += 10.0);\n  EVAL(\"TridiagMatrix diag_vector member function as lvalue (lower)\", myTridiagMatrix, T, true, T.diag_vector(-1) = 0.0);\n  EVAL(\"LowerMatrix diag_vector member function as lvalue (lower)\", myLowerMatrix, L, true, L.diag_vector(-1) = 0.0);\n\n  should_fail = true;\n  EVAL(\"LowerMatrix diag_vector member function as lvalue (upper)\", myLowerMatrix, L, true, L.diag_vector(1) = 0.0);\n  EVAL(\"UpperMatrix diag_vector member function as lvalue (lower)\", myUpperMatrix, U, true, U.diag_vector(-1) = 0.0);\n  should_fail = false;\n\n  EVAL(\"UpperMatrix diag_vector member function as lvalue (upper)\", myUpperMatrix, U, true, U.diag_vector(1) = 0.0);\n  EVAL(\"Odd band matrix \\\"diag_vector\\\" member function\", myOddBandMatrix, Q, true, Q.diag_vector(1) = -1.0);\n  EVAL(\"Odd band matrix \\\"diag_vector\\\" member function\", myOddBandMatrix, Q, true, Q.diag_vector(0) = -1.0);\n  EVAL(\"Odd band matrix \\\"diag_vector\\\" member function\", myOddBandMatrix, Q, true, Q.diag_vector(-1) = -1.0);\n  EVAL(\"Odd band matrix \\\"diag_vector\\\" member function\", myOddBandMatrix, Q, true, Q.diag_vector(-2) = -1.0);\n\n  EVAL2(\"Array submatrix_on_diagonal member function\", myMatrix, M, false, myMatrix, S, M = S.submatrix_on_diagonal(1,2));\n  EVAL(\"Array submatrix_on_diagonal member function as lvalue\", myMatrix, S, true, S.submatrix_on_diagonal(0,1) = 0.0);\n\n  should_fail = true;\n  EVAL2(\"Array submatrix_on_diagonal member function to non-square matrix\", myMatrix, M, false, myMatrix, N, M = N.submatrix_on_diagonal(1,2));\n  should_fail = false;\n\n  EVAL2(\"SymmMatrix submatrix_on_diagonal member function\", mySymmMatrix, P, false, mySymmMatrix, O, P = O.submatrix_on_diagonal(1,2));\n  EVAL2(\"DiagMatrix submatrix_on_diagonal member function\", myDiagMatrix, E, false, myDiagMatrix, D, E = D.submatrix_on_diagonal(1,2));\n  EVAL2(\"TridiagMatrix submatrix_on_diagonal member function\", myTridiagMatrix, TT, false, myTridiagMatrix, T, TT = T.submatrix_on_diagonal(1,2));\n  EVAL2(\"LowerMatrix submatrix_on_diagonal member function\", myLowerMatrix, LL, false, myLowerMatrix, L, LL = L.submatrix_on_diagonal(1,2));\n  EVAL2(\"UpperMatrix submatrix_on_diagonal member function\", myUpperMatrix, UU, false, myUpperMatrix, U, UU = U.submatrix_on_diagonal(1,2));\n  EVAL2(\"Odd band matrix submatrix_on_diagonal member function\", myOddBandMatrix, R, false, myOddBandMatrix, Q, R = Q.submatrix_on_diagonal(1,3));\n  EVAL(\"Odd band matrix submatrix_on_diagonal as lvalue\", myOddBandMatrix, Q, true, Q.submatrix_on_diagonal(1,3) = -1);\n  EVAL2(\"SymmMatrix transpose as rvalue via T member function\", mySymmMatrix, P, false, mySymmMatrix, O, P = O.T());\n  EVAL2(\"DiagMatrix transpose as rvalue via T member function\", myDiagMatrix, E, false, myDiagMatrix, D, E = D.T());\n  EVAL2(\"TridiagMatrix transpose as rvalue via T member function\", myTridiagMatrix, TT, false, myTridiagMatrix, T, TT = T.T());\n  EVAL2(\"LowerMatrix transpose as rvalue via T member function\", myUpperMatrix, U, false, myLowerMatrix, L, U = L.T());\n  EVAL2(\"UpperMatrix transpose as rvalue via T member function\", myLowerMatrix, L, false, myUpperMatrix, U, L = U.T());\n\n  HEADING(\"EXPANSION OPERATIONS\");\n  EVAL2(\"Outer product\", myMatrix, M, false, myVector, v, M = outer_product(v,v));\n  EVAL2(\"Outer product on indexed array\", myMatrix, M, false, myVector, v, M = outer_product(v,v(stride(end,0,-1))));\n  EVAL2(\"Outer product on expressions\", myMatrix, M, false, myVector, v, M = outer_product(2.0*v,v-1.0));\n  EVAL2(\"Vector spread of dimension 0\", myMatrix, M, false, myVector, v, M = spread<0>(v,2));\n  EVAL2(\"Vector spread of dimension 1\", myMatrix, M, false, myVector, v, M = spread<1>(v,2));\n  EVAL2(\"Vector spread with expression argument\", myMatrix, M, false, myVector, v, M = spread<1>(v*2.0,2));\n  EVAL2(\"Matrix spread of dimension 0\", myArray3D, A, false, myMatrix, M, A = spread<0>(M,2));\n  EVAL2(\"Matrix spread of dimension 1\", myArray3D, A, false, myMatrix, M, A = spread<1>(M,2));\n  EVAL2(\"Matrix spread of dimension 2\", myArray3D, A, false, myMatrix, M, A = spread<2>(M,2));\n\n#ifndef ALL_COMPLEX\n\n#ifndef MARVEL_STYLE\n  if (adept::have_matrix_multiplication()) {\n    HEADING(\"MATRIX MULTIPLICATION\");\n    EVAL3(\"Matrix-Vector multiplication\", myVector, w, false, myMatrix, M, myVector, v, w = M ** v);\n    EVAL3(\"Matrix-Vector multiplication with strided matrix\", myVector, w, false, myMatrix, Mstrided, myVector, v, w = Mstrided ** v);\n    EVAL2(\"Matrix-Matrix multiplication\", myMatrix, M, false, myMatrix, N, M = N.T() ** N);\n    EVAL2(\"Matrix-Matrix multiplication with matmul\", myMatrix, M, false, myMatrix, N, M = matmul(N.T(), N));\n    \n    should_fail = true;\n    EVAL2(\"Matrix-Matrix multiplication with inner dimension mismatch\", myMatrix, M, false, myMatrix, N, M = N ** N);\n    should_fail = false;\n    \n    // TESTING!\n    EVAL2(\"Matrix-Matrix-Vector multiplication\", myVector, v, true, myMatrix, S, v = S ** S ** v);\n    \n    EVAL2(\"Matrix-Matrix-Vector multiplication\", myVector, v, false, myMatrix, S, v = S ** log(S) ** S(0,__));\n    EVAL2(\"Vector-Matrix multiplication\", myVector, v, true, myMatrix, S, v = v ** S);\n    EVAL2(\"Vector-Matrix multiplication with matmul\", myVector, v, true, myMatrix, S, v = matmul(v, S));\n    EVAL2(\"SymmMatrix-Vector multiplication\", myVector, v, true, mySymmMatrix, O, v = O ** v);\n    EVAL2(\"SymmMatrix-Matrix multiplication\", myMatrix, S, true, mySymmMatrix, O, S = O ** S);\n    EVAL2(\"Vector-SymmMatrix multiplication\", myVector, v, true, mySymmMatrix, O, v = v ** O);\n    EVAL2(\"Matrix-SymmMatrix multiplication\", myMatrix, M, true, mySymmMatrix, O, M = M ** O);\n    EVAL2(\"DiagMatrix-Vector multiplication\", myVector, v, true, myDiagMatrix, D, v = D ** v);\n    EVAL2(\"TridiagMatrix-Vector multiplication\", myVector, v, true, myTridiagMatrix, T, v = T ** v);\n    EVAL2(\"TridiagMatrix-Matrix multiplication\", myMatrix, S, true, myTridiagMatrix, T, S = T ** S);\n    \n    EVAL2(\"LowerMatrix-Matrix multiplication\", myMatrix, S, true, myLowerMatrix, L, S = L ** S);\n    \n    EVAL2(\"Vector-TridiagMatrix multiplication\", myVector, v, true, myTridiagMatrix, T, v = v ** T);\n    EVAL2(\"Matrix-TridiagMatrix multiplication\", myMatrix, M, true, myTridiagMatrix, T, M = M ** T);\n  }\n  else {\n    std::cout << \"NO MATRIX MULTIPLICATION TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\\n\";\n  }\n    \n#ifndef ALL_ACTIVE\n  if (adept::have_linear_algebra()) {\n    HEADING(\"LINEAR ALGEBRA\");\n    EVAL2(\"Solving general linear equations Ax=b\", myVector, v, true, myMatrix, S, v = solve(S,v));\n    EVAL2(\"Solving general linear equations Ax=b with expression arguments\", myVector, v, true, myMatrix, S, v = solve(S,2*v));\n    \n    EVAL2(\"Solving general linear equations AX=B\", myMatrix, M, true, myMatrix, S, M.T() = solve(S,M.T()));\n    EVAL2(\"Solving general linear equations AX=B with expression arguments\", myMatrix, M, true, myMatrix, S, M.T() = solve(2.0 * S,2.0 * M.T()));\n    EVAL2(\"Solving linear equations Ax=b with symmetric A\", myVector, v, true, mySymmMatrix, O, v = solve(O,v));\n    EVAL2(\"Solving linear equations AX=B with symmetric A\", myMatrix, M, true, mySymmMatrix, O, M.T() = solve(O,M.T()));\n    EVAL3(\"Solving linear equations AX=B with symmetric A and B\", myMatrix, S, false, mySymmMatrix, O, mySymmMatrix, P, S = solve(O,P));\n    EVAL2(\"Solving linear equations Ax=b with upper-triangular A\", myVector, v, true, myUpperMatrix, U, v = solve(U,v));\n    EVAL2(\"Invert general matrix\", myMatrix, M, false, myMatrix, S, M = inv(S));\n    EVAL2(\"Invert symmetric matrix\", mySymmMatrix, P, false, mySymmMatrix, O, P = inv(O));\n  }\n  else {\n    std::cout << \"NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\\n\";\n  }\n#else\n  std::cout << \"NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ACTIVE ARRAYS NOT YET SUPPORTED\\n\";\n#endif\n#else\n  std::cout << \"NO MATRIX TESTS PERFORMED BECAUSE USING MARVEL-STYLE ACTIVE ARRAYS\\n\";\n#endif\n\n#endif\n\n  HEADING(\"FILLING ARRAYS\");\n  EVAL(\"Fill vector with \\\"<<\\\"\", myVector, v, true, (v << 0.1, 0.2));\n\n  should_fail = true;\n  EVAL(\"Overfill vector with \\\"<<\\\"\", myVector, v, true, (v << 0.1, 0.2, 0.3, 0.4));\n  should_fail = false;\n\n  EVAL(\"Underfill matrix with \\\"<<\\\"\", myMatrix, M, true, (M << 0.1, 0.2, 0.3, 0.4, 0.5));\n  EVAL(\"Fill matrix with \\\"<<\\\"\", myMatrix, M, true, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6));\n\n  should_fail = true;\n  EVAL(\"Overfill matrix with \\\"<<\\\"\", myMatrix, M, true, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0));\n  should_fail = false;\n\n  EVAL(\"Fill vector with vectors using \\\"<<\\\"\", myVector, v, true, v << v(range(1,2)) << 0.1);\n  EVAL2(\"Fill matrix with vector using \\\"<<\\\"\", myMatrix, M, true, myVector, v, M << 0.1 << 0.2 << 0.3 << v);\n  EVAL2(\"Fill matrix with vector using \\\"<<\\\"\", myMatrix, S, true, myVector, v, S << v << v << v);\n  EVAL(\"Assign array using range\", myVector, v, false, v = range(3,6));\n\n  HEADING(\"PRINTING WITH PLAIN STYLE\");\n  adept::set_array_print_style(PRINT_STYLE_PLAIN);\n  SIMPLE_EVAL(\"Printing empty vector\", myVector, v, false, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing vector\", myVector, v, true, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing matrix\", myMatrix, M, true, std::cout << M << '\\n');\n  SIMPLE_EVAL(\"Printing 3D array\", myArray3D, A, true, std::cout << A << '\\n');\n\n  HEADING(\"PRINTING WITH CSV STYLE\");\n  adept::set_array_print_style(PRINT_STYLE_CSV);\n  SIMPLE_EVAL(\"Printing empty vector\", myVector, v, false, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing vector\", myVector, v, true, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing matrix\", myMatrix, M, true, std::cout << M << '\\n');\n  SIMPLE_EVAL(\"Printing 3D array\", myArray3D, A, true, std::cout << A << '\\n');\n\n  HEADING(\"PRINTING WITH CURLY STYLE\");\n  adept::set_array_print_style(PRINT_STYLE_CURLY);\n  SIMPLE_EVAL(\"Printing empty vector\", myVector, v, false, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing vector\", myVector, v, true, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing matrix\", myMatrix, M, true, std::cout << M << '\\n');\n  SIMPLE_EVAL(\"Printing 3D array\", myArray3D, A, true, std::cout << A << '\\n');\n\n  HEADING(\"PRINTING WITH MATLAB STYLE\");\n  adept::set_array_print_style(PRINT_STYLE_MATLAB);\n  SIMPLE_EVAL(\"Printing empty vector\", myVector, v, false, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing vector\", myVector, v, true, std::cout << v << '\\n');\n  SIMPLE_EVAL(\"Printing matrix\", myMatrix, M, true, std::cout << M << '\\n');\n  SIMPLE_EVAL(\"Printing 3D array\", myArray3D, A, true, std::cout << A << '\\n');\n  adept::set_array_print_style(PRINT_STYLE_CURLY);\n\n  HEADING(\"EXPRESSION PRINTING\");\n  EVAL(\"Send expression to standard output\", myMatrix, M, true,\n       std::cout << M(0,__) + M(1,__) << '\\n');\n  EVAL(\"Send scalar expression to standard output\", myVector, v, true,\n       std::cout << v(0) + v(1) << '\\n');\n\n#ifdef ADEPT_BOUNDS_CHECKING\n  HEADING(\"BOUNDS CHECKING\");\n  should_fail = true;\n  EVAL(\"Access vector out of bounds\", myVector, v, true, v(0) = v(4));\n  EVAL(\"Access vector out of bounds\", myVector, v, true, v(0) = v(end-4));\n  EVAL(\"Access matrix out of bounds\", myMatrix, M, true, M(0,0) = M(0,-1));\n  EVAL(\"Access matrix out of bounds\", myMatrix, M, true, M(0,0) = M(end+1,1));\n  should_fail = false;\n#endif\n\n  std::cout << \"====================================================================\\n\";\n#ifdef ALL_ACTIVE\n  std::cout << stack;\n  std::cout << \"====================================================================\\n\";\n#endif\n\n  if (anomalous_results > 0) {\n    std::cout << \"*** In terms of run-time errors, there were \" << anomalous_results << \" incorrect results\\n\";\n  }\n  else {\n    std::cout << \"In terms of run-time errors, all tests were passed\\n\";\n  }\n\n#ifdef ALL_ACTIVE\n#ifdef ADEPT_RECORDING_PAUSABLE\n  if (stack.n_statements() > 1) {\n    std::cout << \"*** Stack contains \" << stack.n_statements()-1\n\t      << \" statements and \" << stack.n_operations()\n\t      << \" operations but both should be 0 because recording has been paused\\n\";\n    return 1;\n  }\n#endif\n#endif\n  if (anomalous_results > 0) {\n    return 1;\n  }\n  else {\n    return 0;\n  }\n}\n"
  },
  {
    "path": "test/test_checkpoint.cpp",
    "content": "/* test_checkpoint.cpp - Test manual checkpointing of a simulation\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <iostream>\n#include <cmath>\n//#include <fenv.h>\n\n#include \"adept.h\"\n// This header file is in the same directory as adept.h in the Adept\n// package\n#include \"Timer.h\"\n\nusing adept::adouble;\n\n// Number of points in spatial grid of simulation\n#define NX 100\n\n// \"Toon\" advection scheme applied to linear advection in a 1D\n// periodic domain - see Adept paper for details\nstatic\nvoid\ntoon(int nt, double c, const adouble q_init[NX], adouble q[NX]) {\n  adouble flux[NX-1];                        // Fluxes between boxes\n  for (int i=0; i<NX; i++) q[i] = q_init[i]; // Initialize q\n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    for (int i=0; i<NX-1; i++) flux[i] = (exp(c*log(q[i]/q[i+1]))-1.0) \n                                         * q[i]*q[i+1] / (q[i]-q[i+1]);\n    for (int i=1; i<NX-1; i++) q[i] += flux[i-1]-flux[i];\n    q[0] = q[NX-2]; q[NX-1] = q[1];          // Treat boundary conditions\n  }\n}\n\n// Main program to test checkpointing\nint\nmain(int argc, char** argv)\n{\n  Timer timer;\n  timer.print_on_exit(true);\n\n  // Note that in single precision the derivative calculation causes a\n  // floating-point error due to negative overflow\n  //  feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);\n\n  const double pi = 4.0*atan(1.0);\n\n  // Edit these variables to change properties of simulation\n  const int nblocks = 100;   // Number of checkpoints\n  const int nt = 100;        // Number of timesteps between checkpoints\n  const double dt = 0.125;   // Timestep (actually a Courant number)\n\n  // Initial values of field as a double array\n  double q_init_save[NX];\n\n  // First initialize the field - note that the Toon function does not\n  // like identical values next to each other\n  for (int i = 0; i < NX; i++) {\n    q_init_save[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+0.0001;\n  }\n\n  // We perform the simulation twice, once without checkpointing and\n  // once with\n  int full_id = timer.new_activity(\"Non-checkpointed simulation\");\n  int checkpointed_id = timer.new_activity(\"Checkpointed simulation\");\n\n  bool nan_appeared = false;\n\n\n  // PART 1: NON-CHECKPOINTED SIMULATION\n  timer.start(full_id);\n  { \n    // Note that we run each test in a pair of curly brackets so that\n    // the Adept stack goes out of scope and is destructed before the\n    // next test is performed\n    std::cout << \"*** NON-CHECKPOINTED SIMULATION ***\\n\";\n\n    adept::Stack stack;\n\n    adouble q_init[NX];  // Initial values of field as adouble array\n    adouble q[NX];       // Final values \n\n    // Rate of change of cost function with respect to initial values\n    // of the field\n    double dJ_dq[NX];\n\n    // Copy initial values\n    for (int i = 0; i < NX; i++) {\n      q_init[i] = q_init_save[i];\n    }\n\n    // Run a simulation with nt*nblocks timesteps\n    stack.new_recording();\n    toon(nt*nblocks, dt, q_init, q);\n\n    // Define a \"cost function\" J that is the sum of squared\n    // differences between the final field and the initial field\n    adouble J = 0.0;\n    for (int i = 0; i < NX; i++) {\n      J += (q[i]-q_init_save[i])*(q[i]-q_init_save[i]);\n    }\n\n    // In order to get the gradients of the cost function with respect\n    // to the initial field, we first set the seed gradient of the\n    // cost function to unity\n    J.set_gradient(1.0);\n\n    // Perform adjoint calculation\n    stack.reverse();\n\n    // Extract the gradients\n    adept::get_gradients(q_init, NX, dJ_dq);\n  \n    // Print out the results\n    std::cout << \"J=\" << J << \"\\n\";\n    std::cout << \"q_final=[\";\n    for (int i = 0; i < NX; i++) {\n      std::cout << \" \" << q[i];\n    }\n    std::cout << \"]\\n\";\n    std::cout << \"dJ_dq=[\";\n    for (int i = 0; i < NX; i++) {\n      std::cout << \" \" << dJ_dq[i];\n      nan_appeared = nan_appeared || std::isnan(dJ_dq[i]);\n    }\n    std::cout << \"]\\n\";\n    std::cout << stack;\n  }\n\n\n  // PART 2: CHECKPOINTED SIMULATION\n  timer.start(checkpointed_id);\n  {\n    std::cout << \"*** CHECKPOINTED SIMULATION ***\\n\";\n    adept::Stack stack;\n\n    // We save the field at each checkpoint, where 0 corresponds to\n    // the initial values and nblocks-1 corresponds to the final\n    // checkpoint (which is not the very final set of values of the\n    // field).  Note that this will only work if nblocks is non-const\n    // if you use gcc, which has a C++ extension to allow C99-style\n    // variable-length arrays.\n    adouble q_save[nblocks][NX];\n\n    // This will be the very final set of values of the field\n    adouble q[NX];\n\n    // Rate of change of cost function with respect to initial values\n    // of the field\n    double dJ_dq[NX];\n\n    // Copy initial values\n    for (int i = 0; i < NX; i++) {\n      q_save[0][i] = q_init_save[i];\n    }\n\n    // Run simulation in a set of blocks, saving the results each\n    // time. Note that this step does not need to be automatically\n    // differentiated, hence the use of pause_recording and\n    // continue_recording.\n    for (int i = 0; i < nblocks-1; i++) {\n      stack.pause_recording();\n      toon(nt, dt, q_save[i], q_save[i+1]);\n      stack.continue_recording();\n    }\n\n    // Now we rerun the simulations multiple times with automatic\n    // differentiation, each time stepping back to the previous block.\n    // The first simulation is treated separately since this is the\n    // one in which the gradient of the cost function is computed.\n    stack.new_recording();\n    toon(nt, dt, q_save[nblocks-1], q);\n\n    // Define a \"cost function\" J that is the sum of squared\n    // differences between the final field \"q\" and the initial field\n    adouble J = 0.0;\n    for (int i = 0; i < NX; i++) {\n      J += (q[i]-q_init_save[i])*(q[i]-q_init_save[i]);\n    }\n\n    // In order to get the gradients of the cost function with respect\n    // to the initial field, we first set the seed gradient of the\n    // cost function to unity\n    J.set_gradient(1.0);\n\n    // Perform adjoint calculation\n    stack.reverse();\n\n    // Extract the gradients of the cost function with respect to the\n    // values at the final checkpoint\n    adept::get_gradients(q_save[nblocks-1], NX, dJ_dq);\n\n    // Print out the simulation results (not yet the gradients)\n    std::cout << \"J=\" << J << \"\\n\";\n    std::cout << \"q_final=[\";\n    for (int i = 0; i < NX; i++) {\n      std::cout << \" \" << q[i];\n    }\n    std::cout << \"]\\n\";\n\n    // Now we repeat the simulation starting one checkpoint earlier\n    // each time, with the final simulation being performed starting\n    // at the initial values of the field\n    for (int i = nblocks-2; i >= 0; i--) {\n      stack.new_recording();\n      toon(nt, dt, q_save[i], q);\n\n      // This time we use the set of gradients output from the previous\n      // simulation (which can be thought of as dJ/dq_save[i+1]) as\n      // the input gradients for the next\n      adept::set_gradients(q, NX, dJ_dq);\n\n      // Perform adjoint calculation\n      stack.reverse();\n\n      // Extract the next set of gradients (which can be thought of as\n      // dJ/dq_save[i]) and place in dJ_dq ready for the next\n      // iteration\n      adept::get_gradients(q_save[i], NX, dJ_dq);\n    }\n\n    // Print out the gradients\n    std::cout << \"dJ_dq=[\";\n    for (int i = 0; i < NX; i++) {\n      std::cout << \" \" << dJ_dq[i];\n      nan_appeared = nan_appeared || std::isnan(dJ_dq[i]);\n    }\n    std::cout << \"]\\n\";\n    std::cout << stack;\n  }\n  timer.stop();\n\n  if (nan_appeared) {\n    std::cerr << \"*** Error: some NaNs appeared\\n\";\n    return 1;\n  }\n  else {\n    return 0;\n  }\n\n}\n"
  },
  {
    "path": "test/test_constructors.cpp",
    "content": "/* test_constructors.cpp - Test Adept's selection of constructors in a range of scenarios\n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <iostream>\n\n#define ADEPT_BOUNDS_CHECKING 1\n#define ADEPT_VERBOSE_FUNCTIONS\n#define ADEPT_NO_ALIAS_CHECKING\n\n#include <adept_arrays.h>\n\nusing namespace adept;\n\nVector square(const Vector& v) {\n  std::cout << \"  inside function\\n\";\n  return v*v;\n}\n\nvoid square_in_place(Vector& v) {\n  std::cout << \"  inside function\\n\";\n  v *= v;\n}\n\nVector square_copy(Vector v) {\n  std::cout << \"  inside function\\n\";\n  v *= -1.0;\n  return v*v;\n}\n\n#define COMMA ,\n\n#define EVAL_CONSTRUCT(MSG,X,COMMAND) std::cout << \"--------------------------------------------------------------------\\n\" \\\n  << MSG << \"\\n\" \\\n  << #COMMAND << \"\\n\"; \\\n  COMMAND; \\\n  std::cout << #X << \" = \" << X << \"\\n\"\n\n#define EVAL(MSG,X,COMMAND) std::cout << \"--------------------------------------------------------------------\\n\" \\\n  << MSG << \"\\n\" \\\n  << #X << \" = \" << X << \"\\n\" \\\n  << #COMMAND << \"\\n\"; \\\n  COMMAND; \\\n  std::cout << #X << \" = \" << X << \"\\n\"\n\n #define EVAL2(MSG,X,COMMAND,Y) std::cout << \"--------------------------------------------------------------------\\n\" \\\n  << MSG << \"\\n\" \\\n  << #X << \" = \" << X << \"\\n\" \\\n  << #COMMAND << \"\\n\"; \\\n  COMMAND;\t\t\t\t\t\\\n  std::cout << #X << \" = \" << X << \"\\n\" \\\n            << #Y << \" = \" << Y << \"\\n\"\n\n#define EVAL_FAIL(MSG,X,COMMAND) std::cout << \"--------------------------------------------------------------------\\n\" \\\n  << MSG << \"\\n\" \\\n  << #COMMAND << \"\\n\" \\\n  << \"DOES NOT COMPILE (INCORRECT BEHAVIOUR)\\n\"\n\n#define EVAL2_FAIL(MSG,X,COMMAND,Y) std::cout << \"--------------------------------------------------------------------\\n\" \\\n  << MSG << \"\\n\" \\\n  << #COMMAND << \"\\n\" \\\n  << \"DOES NOT COMPILE (INCORRECT BEHAVIOUR)\\n\"\n\n#define VERDICT98(MSG) std::cout << \"Verdict for C++98: \" << MSG << \"\\n\"\n#define VERDICT11(MSG) std::cout << \"Verdict for C++11: \" << MSG << \"\\n\"\n\n#define HEADING(MSG) std::cout << \"####################################################################\\n\" \\\n  << MSG << \"\\n\"\n\n\nint\nmain() {\n\n  Vector v(2), w(2), v_data(2), v_const_data(2);\n  v_data << 2, 3;\n  v_const_data << 5, 7;\n  v = v_data;\n  const Vector v_const = v_const_data;\n\n  adept::Stack stack;\n  stack.new_recording();\n\n  {\n  HEADING(\"COPY CONSTRUCTORS\");\n  EVAL2(\"Passing Vector as argument to Vector copy constructor\",\n\tv, const Vector v2(v), v2);\n  VERDICT98(\"correct\");\n  VERDICT11(\"should perform deep copy\");\n\n  EVAL2(\"Passing Vector as argument to const Vector copy constructor\",\n\tv, const Vector v_const(v), v_const);\n  VERDICT98(\"correct\");\n  VERDICT11(\"should perform deep copy\");\n\n  EVAL2(\"Passing const Vector as argument to const Vector copy constructor\",\n\tv_const, const Vector v_const2(v_const), v_const2);\n  VERDICT98(\"correct\");\n  VERDICT11(\"should perform deep copy\");\n\n  EVAL2(\"Passing const Vector as argument to Vector copy constructor\",\n\tv_const, Vector v3(v_const), v3);\n  VERDICT98(\"should not compile\");\n  VERDICT11(\"should perform deep copy\");\n  }\n\n#ifdef ADEPT_CXX11_FEATURES\n  HEADING(\"INITIALIZER LISTS\");\n  EVAL_CONSTRUCT(\"Construct Vector from initializer list of ints\",\n\tv1, Vector v1 = {1 COMMA 2 COMMA 3});\n  EVAL_CONSTRUCT(\"Construct Vector from initializer list of doubles\",\n\tv1d, Vector v1d = {1.0 COMMA 2.0 COMMA 3.0});\n  EVAL_CONSTRUCT(\"Construct Matrix from initializer list\",\n\t\t M, Matrix M = { {1 COMMA 2} COMMA {3} } );\n  EVAL_CONSTRUCT(\"Construct Array3D from initializer list\",\n\t\t A3, Array3D A3 = { { {1 COMMA 2} COMMA {3} } COMMA { { 4 } } } );\n  EVAL_CONSTRUCT(\"Construct FixedVector from initializer list\",\n\t\t fv1, Vector3 fv1 = {1 COMMA 2});\n  EVAL_CONSTRUCT(\"Construct FixedMatrix from initializer list\",\n\t\t fM, Matrix33 fM = { {1 COMMA 2} COMMA {3} } );\n  EVAL_CONSTRUCT(\"Construct FixedArray3D from initializer list\",\n\t\t fA3, FixedArray<double COMMA false COMMA 3 COMMA 3 COMMA 3> fA3 = { { {1 COMMA 2} COMMA {3} } COMMA { { 4 } } } );\n#endif\n\n  HEADING(\"ASSIGNMENT OPERATOR\");\n  EVAL2(\"Passing Vector to assignment operator\",\n\tv, w = v, w);\n  EVAL2(\"Passing const Vector to assignment operator\",\n\tv_const, w = v_const, w);\n  EVAL2(\"Passing Vector rvalue to assignment operator\",\n\tv, w = v(stride(1,0,-1)), w);\n  EVAL2(\"Passing const-Vector rvalue to assignment operator\",\n\tv_const, w = v_const(stride(1,0,-1)), w);\n  EVAL2(\"Passing Expression to assignment operator\",\n\tv, w = v+v, w);\n\n  HEADING(\"PASSING Vector TO FUNCTIONS\");\n  EVAL2(\"Passing Vector as argument to function taking const Vector&\",\n       v, w = square(v), w);\n  VERDICT98(\"too many copies\");\n  VERDICT11(\"could replace last copy with a move\");\n  EVAL(\"Passing Vector as argument to function taking Vector&\",\n       v, square_in_place(v));\n  VERDICT98(\"correct\");\n\n  v = v_data;\n  EVAL2(\"Passing Vector as argument to function taking Vector\",\n       v, w = square_copy(v), w);\n  VERDICT98(\"too many copies, unexpected change of argument\");\n  VERDICT11(\"should do deep copy on input, replace last copy with a move\");\n\n  /*\n\n    // Behaves same as passing non-const Vector, which is correct\n\n  // Passing const Vector\n  EVAL2(\"Passing const Vector as argument to function taking const Vector&\",\n       v_const, w = square(v_const), w);\n  // The following should not compile:\n  //  EVAL(\"Passing const Vector as argument to function taking Vector&\",\n  //       v_const, square_in_place(v_const));\n  EVAL2(\"Passing const Vector as argument to function taking Vector\",\n       v_const, w = square_copy(v_const), w);\n\n  */\n\n\n  HEADING(\"LINKING\");\n  w.clear();\n  EVAL2(\"Linking to Vector\",\n\tv, w >>= v, w);\n\n  /*\n  w.clear();\n  // This should not compile\n  EVAL2(\"Linking to const Vector\",\n\tv_const, w >>= v_const, w);\n  */\n  w.clear();\n  EVAL2(\"Linking to Vector rvalue\",\n\tv, w >>= v(stride(1,0,-1)), w);\n\n  /*\n  // This should not compile\n  w.clear();\n  EVAL2(\"Linking to const-Vector rvalue\",\n\tv_const, w >>= v_const(stride(1,0,-1)), w);\n  */\n  /*\n    // This should not compile\n  w.clear();\n  EVAL2(\"Linking to Expression\",\n\tv, w >>= v+v, w);\n  VERDICT98(\"this doesn't make much sense\");\n  */\n\n  HEADING(\"PASSING Vector TO FUNCTIONS\");\n  EVAL2(\"Passing Vector as argument to function taking const Vector&\",\n       v, w = square(v), w);\n  VERDICT98(\"too many copies\");\n  VERDICT11(\"could replace last copy with a move\");\n  EVAL(\"Passing Vector as argument to function taking Vector&\",\n       v, square_in_place(v));\n  VERDICT98(\"correct\");\n\n  v = v_data;\n  EVAL2(\"Passing Vector as argument to function taking Vector\",\n       v, w = square_copy(v), w);\n  VERDICT98(\"too many copies, unexpected change of argument\");\n  VERDICT11(\"should do deep copy on input, replace last copy with a move\");\n\n\n  HEADING(\"PASSING Vector RVALUE TO FUNCTIONS\");\n  EVAL2(\"Passing Vector rvalue as argument to function taking const Vector&\",\n\tv, w = square(v(stride(1,0,-1))), w);\n  VERDICT98(\"correct\");\n  EVAL_FAIL(\"Passing Vector rvalue as argument to function taking Vector&\",\n       v, square_in_place(v(stride(1,0,-1))));\n  VERDICT98(\"Vector subset functions could return references?\");\n\n  v = v_data;\n  EVAL2(\"Passing Vector rvalue as argument to function taking Vector\",\n\t     v, w = square_copy(v(stride(1,0,-1))), w);\n  VERDICT98(\"Vector subset functions could return references?\");\n  VERDICT11(\"Should use move function\");\n\n  HEADING(\"PASSING const Vector RVALUES TO FUNCTIONS\");\n  EVAL2(\"Passing const-Vector rvalue as argument to function taking const Vector&\",\n\tv_const, w = square(v_const(stride(1,0,-1))), w);\n  VERDICT98(\"correct\");\n  // This should not compile\n  //  EVAL(\"Passing const-Vector rvalue as argument to function taking Vector&\",\n  //       v_const, square_in_place(v_const(stride(1,0,-1))));\n  //  VERDICT98(\"Vector subset functions could return references?\");\n  EVAL2(\"Passing const-Vector rvalue as argument to function taking Vector\",\n\t     v_const, w = square_copy(v_const(stride(1,0,-1))), w);\n  VERDICT98(\"correct\");\n  //  VERDICT11(\"Should use move function\");\n\n  HEADING(\"PASSING Expression TO FUNCTIONS\");\n  EVAL2(\"Passing Expression as argument to function taking const Vector&\",\n       v, w = square(v+v), w);\n  VERDICT98(\"Unclear why copy-assignment + constructor needed\");\n  // This should not compile:\n  //  EVAL(\"Passing Expression as argument to function taking Vector&\",\n  //       v, square_in_place(v+v));\n  v = v_data;\n  EVAL2(\"Passing Expression as argument to function taking Vector\",\n       v, w = square_copy(v+v), w);\n  VERDICT98(\"Unclear why copy-assignment + constructor needed\");\n\n  return 0;\n}\n"
  },
  {
    "path": "test/test_derivatives.cpp",
    "content": "/* test_derivatives.cpp - Test derivatives of mathematical functions \n\n    Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n*/\n\n#include <adept_arrays.h>\n\n\n#define TEST_UNARY_FUNC(FUNC)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\\\n    std::cout << \"  Checking \" << #FUNC << \"... \\t\";\t\t\\\n    aVector x = x_save;\t\t\t\t\t\\\n    stack.new_recording();\t\t\t\t\t\\\n    aVector y = FUNC(x);\t\t\t\t\t\\\n    Vector dy_dx_num  = (FUNC(x_save+dx)-FUNC(x_save)) / dx;\t\\\n    Vector dy_dx_adept(N);\t\t\t\t\t\\\n    for (int i = 0; i < N; ++i) {\t\t\t\t\\\n      x[i].set_gradient(1.0);\t\t\t\t\t\\\n      stack.forward();\t\t\t\t\t\t\\\n      y[i].get_gradient(dy_dx_adept[i]);\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n    Real max_err\t\t\t\t\t\t\\\n      = maxval(abs(dy_dx_adept-dy_dx_num));\t\t\t\\\n    Real max_frac_err\t\t\t\t\t\t\\\n      = maxval(abs(dy_dx_adept-dy_dx_num)/dy_dx_adept);\t\t\\\n    if (max_err == 0) {\t\t\t\t\t\t\\\n      std::cout << \"max error = 0: PASSED\\n\";\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n    if (max_frac_err <= MAX_FRAC_ERR) {\t\t\t\t\\\n      std::cout << \"max fractional error = \" << max_frac_err\t\\\n\t\t<< \": PASSED\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\\\n      std::cout << \"max fractional error = \"\t\t\t\\\n\t\t<< max_frac_err << \": FAILED\\n\";\t\t\\\n      std::cout << \"    Adept     dy/dx = \"\t\t\t\\\n\t\t<< dy_dx_adept << \"\\n\";\t\t\t\t\\\n      std::cout << \"    Numerical dy/dx = \" << dy_dx_num << \"\\n\";\t\\\n      error_too_large = true;\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n  }\n\n#define TEST_BINARY_FUNC(FUNC)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\t\\\n    std::cout << \"  Checking \" << #FUNC << \"... \\t\";\t\t\\\n    aVector x = x_save;\t\t\t\t\t\\\n    aVector y = y_save;\t\t\t\t\t\t\\\n    stack.new_recording();\t\t\t\t\t\\\n    aVector z = FUNC(x,y);\t\t\t\t\t\\\n    Vector dz_dx_num\t\t\t\t\t\t\\\n      = (FUNC(x_save+dx,y_save)-FUNC(x_save,y_save)) / dx; \\\n    Vector dz_dy_num\t\t\t\t\t\t\\\n      = (FUNC(x_save,y_save+dy)-FUNC(x_save,y_save)) / dy;\t\\\n    Vector dz_dx_adept(N);\t\t\t\t\t\\\n    Vector dz_dy_adept(N);\t\t\t\t\t\\\n    for (int i = 0; i < N; ++i) {\t\t\t\t\\\n      z[i].set_gradient(1.0);\t\t\t\t\t\\\n      stack.reverse();\t\t\t\t\t\t\\\n      x[i].get_gradient(dz_dx_adept[i]);\t\t\t\\\n      y[i].get_gradient(dz_dy_adept[i]);\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n    Real max_err\t\t\t\t\t\t\\\n      = std::max(maxval(abs(dz_dx_adept-dz_dx_num)),\t\t\\\n\t\t maxval(abs(dz_dy_adept-dz_dy_num)));\t\t\\\n    Real max_frac_err\t\t\t\t\t\t\\\n      = std::max(maxval(abs(dz_dx_adept-dz_dx_num)/dz_dx_adept),\t\\\n\t\t maxval(abs(dz_dy_adept-dz_dy_num)/dz_dy_adept));\t\\\n    if (max_err == 0) {\t\t\t\t\t\t\\\n      std::cout << \"max error = 0: PASSED\\n\";\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n    if (max_frac_err <= MAX_FRAC_ERR) {\t\t\t\t\\\n      std::cout << \"max fractional error = \" << max_frac_err\t\\\n\t\t<< \": PASSED\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\\\n      std::cout << \"max fractional error = \"\t\t\t\\\n\t\t<< max_frac_err << \": FAILED\\n\";\t\t\\\n      std::cout << \"    Adept     dz/dx = \" << dz_dx_adept << \"\\n\";\t\\\n      std::cout << \"    Adept     dz/dy = \" << dz_dy_adept << \"\\n\";\t\\\n      std::cout << \"    Numerical dz/dx = \" << dz_dx_num << \"\\n\";\t\\\n      std::cout << \"    Numerical dz/dy = \" << dz_dy_num << \"\\n\";\t\\\n      error_too_large = true;\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\\\n  }\n\n\nint\nmain(int argc, const char** argv) {\n  using namespace adept;\n\n  Stack stack;\n\n  static const int N             = 12;\n  static const Real MAX_FRAC_ERR = 1.0e-5;\n\n  Vector x_save(N);\n  x_save = 0.2;\n  x_save << 0.01, 0.4, 0.99;\n\n  Vector y_save(N);\n  y_save = 0.7;\n  y_save << 0.9, 0.6, 0.1, -0.1;\n\n  Real dx = 1.0e-8;\n\n  if (sizeof(Real) < 8) {\n    // Single precision only works with larger perturbations\n    dx = 1.0e-5;\n  }\n\n  Real dy = dx;\n\n  bool error_too_large = false;  \n\n  std::cout << \"EVALUATING UNARY FUNCTIONS\\n\";\n  std::cout << \"For functions of the form y=FUNC(x), where x=\" << x_save << \",\\n\";\n  std::cout << \"checking that fractional difference between dy/dx computed using Adept\\n\";\n  std::cout << \"and numerically by perturbing x by \" << dx << \" is less than \" << MAX_FRAC_ERR << \".\\n\";    \n\n  \n  TEST_UNARY_FUNC(-); // Unary minus\n  TEST_UNARY_FUNC(+); // Unary plus\n  TEST_UNARY_FUNC(log);\n  TEST_UNARY_FUNC(log10);\n  TEST_UNARY_FUNC(sin);\n  TEST_UNARY_FUNC(cos);\n  TEST_UNARY_FUNC(tan);\n  TEST_UNARY_FUNC(asin);\n  TEST_UNARY_FUNC(acos);\n  TEST_UNARY_FUNC(atan);\n  TEST_UNARY_FUNC(sinh);\n  TEST_UNARY_FUNC(cosh);\n  TEST_UNARY_FUNC(tanh);\n  TEST_UNARY_FUNC(abs);\n  TEST_UNARY_FUNC(fabs);\n  TEST_UNARY_FUNC(exp);\n  TEST_UNARY_FUNC(sqrt);\n  TEST_UNARY_FUNC(ceil);\n  TEST_UNARY_FUNC(floor);\n  TEST_UNARY_FUNC(log2);\n  TEST_UNARY_FUNC(expm1);\n  TEST_UNARY_FUNC(exp2);\n  TEST_UNARY_FUNC(log1p);\n  TEST_UNARY_FUNC(asinh);\n  TEST_UNARY_FUNC(acosh);\n  TEST_UNARY_FUNC(atanh);\n  TEST_UNARY_FUNC(erf);\n  TEST_UNARY_FUNC(erfc);\n  TEST_UNARY_FUNC(cbrt);\n  TEST_UNARY_FUNC(round);\n  TEST_UNARY_FUNC(trunc);\n  TEST_UNARY_FUNC(rint);\n  TEST_UNARY_FUNC(nearbyint);\n\n  std::cout << \"EVALUATING BINARY FUNCTIONS\\n\";\n  std::cout << \"For functions of the form z=FUNC(x,y), where x=\" << x_save << \",\\n\";\n  std::cout << \"and y=\" << y_save << \", checking that fractional difference between\\n\";\n  std::cout << \"dz/dx and dz/dy computed using Adept and numerically by perturbing\\n\";\n  std::cout << \"x and y by \" << dx << \" is less than \" << MAX_FRAC_ERR << \".\\n\";    \n\n  TEST_BINARY_FUNC(pow);\n  TEST_BINARY_FUNC(atan2);\n  TEST_BINARY_FUNC(max);\n  TEST_BINARY_FUNC(min);\n  TEST_BINARY_FUNC(fmax);\n  TEST_BINARY_FUNC(fmin);\n  TEST_BINARY_FUNC(copysign);\n\n\n  if (error_too_large) {\n    std::cerr << \"*** Error: fractional error in the derivatives of some functions too large\\n\";\n\n    if (sizeof(Real) < 8) {\n      std::cerr << \"*** (but you are using less than double precision so it is not surprising)\\n\";\n    }\n\n    return 1;\n  }\n  else {\n    return 0;\n  }\n}\n"
  },
  {
    "path": "test/test_fastexp.cpp",
    "content": "/* test_fastexp.cpp - Test Adept's fast exponential for correctness \n\n  Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n  This file tests Adept's fast exponential function \"fastexp\", which\n  is vectorizable.\n*/\n\n#include <iostream>\n#include <limits>\n#include \"adept_arrays.h\"\n\nusing namespace adept;\n\nint main(int argc, const char** argv)\n{\n  {\n    std::cout << \"DOUBLE PRECISION\\n\";\n    std::cout << \"Packet<double>::size = \" << internal::Packet<double>::size << \"\\n\";\n    Vector x = linspace(-750.0,750.0,128);\n    x(end) = std::numeric_limits<double>::quiet_NaN();\n    Vector exponential = exp(x);\n    Vector fast_exponential = fastexp(x);\n    Vector fractional_error = (fast_exponential - exponential) / exponential;\n    //    std::cout << fractional_error << \"\\n\";\n    Matrix M(128,4);\n    M(__,0) = x;\n    M(__,1) = exponential;\n    M(__,2) = fast_exponential;\n    M(__,3) = fractional_error;\n    std::cout << \"x  exp(x)  fastexp(x)  fractional-error\";\n    std::cout << M << \"\\n\";\n  }\n  {\n    std::cout << \"\\nSINGLE PRECISION\\n\";\n    std::cout << \"Packet<float>::size = \" << internal::Packet<float>::size << \"\\n\";\n    floatVector x = linspace(-100.0,100.0,128);\n    x(end) = std::numeric_limits<float>::quiet_NaN();\n    floatVector exponential = exp(x);\n    floatVector fast_exponential = fastexp(x);\n    floatVector fractional_error = (fast_exponential - exponential) / exponential;\n    floatMatrix M(128,4);\n    M(__,0) = x;\n    M(__,1) = exponential;\n    M(__,2) = fast_exponential;\n    M(__,3) = fractional_error;\n    std::cout << \"x  exp(x)  fastexp(x)  fractional-error\";\n    std::cout << M << \"\\n\";\n  }\n  return 0;\n}\n"
  },
  {
    "path": "test/test_fixed_arrays.cpp",
    "content": "/* test_arrays.cpp - Test Adept's array functionality\n\n    Copyright (C) 2016-2017 European Centre for Medium-Range Weather Forecasts\n\n    Author: Robin Hogan <r.j.hogan@ecmwf.int>\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include <iostream>\n\n#define ADEPT_BOUNDS_CHECKING 1\n\n#include <adept_arrays.h>\n#include <adept/FixedArray.h>\n\n// The following controls whether to use active variables or not\n//#define ALL_ACTIVE 1\n//#define MARVEL_STYLE 1\n\nusing namespace adept;\n\nint\nmain(int argc, const char** argv) {\n  using namespace adept;\n  Stack stack;\n  \n#define HEADING(MESSAGE) \\\n  std::cout << \"====================================================================\\n\"\t\\\n\t    << \"   TESTING \" << MESSAGE << \"\\n\"\n\n#define EVAL(MESSAGE, TYPE, X, EXPR)\t\t\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\\\n    TYPE X;\t\t\t\t\t\t\t\t\\\n    X = test. X;\t\t\t\t\t\t\t\\\n    std::cout << #TYPE << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n#define EVAL2(MESSAGE, TYPEX, X, TYPEY, Y, EXPR)\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    X = test. X;\t\t\t\t\t\t\t\\\n    std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    TYPEY Y; Y = test. Y;\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t        \\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n\n#define EVAL3(MESSAGE, TYPEX, X, TYPEY, Y, TYPEZ, Z, EXPR)\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  try {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    X = test. X;\t\t\t\t\t\t\t\\\n    std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    TYPEY Y; Y = test. Y;\t\t\t\t\t\t\\\n    TYPEZ Z; Z = test. Z;\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << #TYPEZ << \" \" << #Z << \" = \" << Z << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t        \\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  } catch (const adept::exception& e) {\t\t\t\t\t\\\n    std::cout << \"*** Failed with: \" << e.what() << \"\\n\";\t\t\\\n    if (!should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n    else {\t\t\t\t\t\t\t\t\\\n      std::cout << \"*** Correct behaviour\\n\";\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n#define EVAL_NO_TRAP(MESSAGE, TYPE, X, EXPR)\t\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    TYPE X;\t\t\t\t\t\t\t\t\\\n    X = test. X;\t\t\t\t\t\t\t\\\n    std::cout << #TYPE << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }  \n\n#define EVAL2_NO_TRAP(MESSAGE, TYPEX, X, TYPEY, Y, EXPR)\t\t\t\\\n  std::cout << \"--------------------------------------------------------------------\\n\" \\\n\t    << \"### \" << MESSAGE << \"\\n### \" << #EXPR << \"\\n\";\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    TYPEX X;\t\t\t\t\t\t\t\t\\\n    X = test. X;\t\t\t\t\t\t\t\\\n    std::cout << #TYPEX << \" \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    TYPEY Y; Y = test. Y;\t\t\t\t\t\t\\\n    std::cout << #TYPEY << \" \" << #Y << \" = \" << Y << \"\\n\";\t\t\\\n    std::cout << \"Evaluating \" << #EXPR << \"\\n\";\t\t\t\\\n    std::cout.flush();\t\t\t\t\t\t\t\\\n    EXPR;\t\t\t\t\t\t\t\t\\\n    std::cout << \"Result: \" << #X << \" = \" << X << \"\\n\";\t\t\\\n    if (should_fail) { std::cout << \"*** INCORRECT OUTCOME\\n\";\t\\\n      anomalous_results++;\t\t\t\t\t\t\\\n    }\t\t\t\t\t\t\t\t\t\\\n  }\n\n\n#ifdef ALL_ACTIVE\n#define IS_ACTIVE true\n#else\n#define IS_ACTIVE false\n#endif\n\n  typedef FixedArray<double,IS_ACTIVE,2> myVector2;\n  typedef FixedArray<double,IS_ACTIVE,3> myVector3;\n  typedef FixedArray<double,IS_ACTIVE,1,2> myMatrix12;\n  typedef FixedArray<double,IS_ACTIVE,3,3> myMatrix33;\n  typedef FixedArray<double,IS_ACTIVE,2,3> myMatrix23;\n  typedef FixedArray<double,IS_ACTIVE,3,2> myMatrix32;\n  typedef FixedArray<double,IS_ACTIVE,2,2> myMatrix22;\n\n#ifndef ALL_ACTIVE\n  typedef Real myReal;\n  typedef SymmMatrix mySymmMatrix;\n  typedef DiagMatrix myDiagMatrix;\n  typedef TridiagMatrix myTridiagMatrix;\n  typedef LowerMatrix myLowerMatrix;\n  typedef UpperMatrix myUpperMatrix;\n#else\n  typedef aReal myReal;\n  typedef aSymmMatrix mySymmMatrix;\n  typedef aDiagMatrix myDiagMatrix;\n  typedef aTridiagMatrix myTridiagMatrix;\n  typedef aLowerMatrix myLowerMatrix;\n  typedef aUpperMatrix myUpperMatrix;\n#endif\n\n  //  typedef SpecialMatrix<Real,SymmEngine<ROW_UPPER_COL_LOWER>,false> mySymmMatrix;\n  //  typedef SpecialMatrix<Real,BandEngine<COL_MAJOR,0,0>,false> myDiagMatrix;\n  //  typedef SpecialMatrix<Real,BandEngine<COL_MAJOR,1,1>,false> myTridiagMatrix;\n\n\n\n  struct Test {\n\n    myReal x;\n    myVector2 z;\n    myVector3 v, w;\n    myMatrix12 K;\n    myMatrix23 M, N;\n    myMatrix33 S, C;\n    myMatrix32 A;\n    myMatrix22 B;\n\n    mySymmMatrix O, P;\n    myDiagMatrix D, E;\n    myTridiagMatrix T, TT;\n    myLowerMatrix L, LL;\n    myUpperMatrix U, UU;\n\n    intVector index;\n\n\n    Test() {\n      x = -2;\n\n      O.resize(3);\n      //      Q.resize(5);\n      index.resize(2);\n      v(0) = 2; v(1) = 3; v(2) = 5;\n      w(0) = 7; w(1) = 11; w(2) = 13;\n      M(0,0) = 2; M(0,1) = 3; M(0,2) = 5;\n      M(1,0) = 7; M(1,1) = 11; M(1,2) = 13;\n      N(0,0) = 17; N(0,1) = 19; N(0,2) = 23;\n      N(1,0) = 29; N(1,1) = 31; N(1,2) = 37;\n      S(0,0) = 2; S(0,1) = 3; S(0,2) = 5;\n      S(1,0) = 7; S(1,1) = 11; S(1,2) = 13;\n      S(2,0) = 17; S(2,1) = 19; S(2,2) = 23;\n\n      K << 57, 59;\n      z << 37, 47;\n\n      A << 21,22,23,24,25,26;\n      B << 31,32,33,34;\n\n      //      O = -M.T();\n\n\n      O(0,0) = 7;\n      O(1,0) = 2; O(1,1) = 11;\n      O(2,0) = 3; O(2,1) = 5; O(2,2) = 13;\n      /*\n\n      P = 14-O;\n\n      Q.diag_vector(-2) = 1;\n      Q.diag_vector(-1) = 2;\n      Q.diag_vector(0)  = 3;\n      Q.diag_vector(1)  = 4;\n      */\n\n      C = 0;\n      D = S;\n      T = S;\n      L = S;\n      U = S;\n      index << 1, 0;\n    }\n  };\n\n  stack.new_recording();\n\n  Test test;\n\n  bool should_fail=false;\n  int anomalous_results=0;\n\n#ifdef ALL_ACTIVE\n  std::cout << \"Testing ACTIVE arrays\\n\";\n#else\n  std::cout << \"Testing INACTIVE arrays\\n\";\n#endif\n\n\n  HEADING(\"BASIC EXPRESSIONS\");\n  EVAL2(\"Vector assignment to vector\", myVector3, v, myVector3, w, v = w);\n  EVAL2(\"Vector assignment to expression\", myVector3, v, myVector3, w, v = log(w) + 1.0);\n\n  EVAL(\"Matrix *= operator\", myMatrix23, M, M *= 0.5);\n  EVAL2(\"Matrix = scalar\", myMatrix23, M, myReal, x, M = x);\n\n  EVAL2(\"Matrix = scalar expression\", myMatrix23, M, myReal, x, M = (10.0*x));\n  HEADING(\"BASIC FUNCTIONS\");\n  EVAL2(\"max\", myVector3, v, myVector3, w, v = max(v,w/3.0));\n  EVAL2(\"min\", myVector3, v, myVector3, w, v = min(v,w/3.0));\n\n  HEADING(\"ARRAY SLICING\");\n  EVAL2(\"Array indexing rvalue\", myReal, x, myMatrix23, M, x = M(1,end-1));\n\n  should_fail=true;\n  EVAL2(\"Array indexing rvalue out of range (SHOULD FAIL)\", myReal, x, myMatrix23, M, x = M(1,3));\n  should_fail=false;\n\n  EVAL(\"Array indexing lvalue\", myMatrix23, M, M(1,end-1) *= -1.0);\n\n  EVAL2(\"contiguous subarray rvalue\", myVector3, v, myMatrix23, M, v = M(end,__));\n  EVAL(\"contiguous subarray lvalue\", myMatrix23, M, M(end-1,__) /= 2.0);\n  EVAL2(\"contiguous subarray rvalue using range\", myVector2, z, myMatrix23, M, z = 2.0 * M(1,range(1,2)));\n  EVAL2(\"contiguous subarray lvalue using range\", myMatrix23, M, myVector3, v, M(end-1,range(0,1)) = log(v(range(1,2))));\n  EVAL2(\"contiguous subarray rvalue using subset\", myMatrix12, K, myMatrix23, N, K = 2.0 * N.subset(1,1,1,2));\n  EVAL(\"contiguous subarray lvalue using subset\", myVector3, v, v.subset(end-1,end) *= 10.0);\n\n  EVAL2(\"regular subarray rvalue\", myVector3, v, myVector3, w, v = w(stride(end,0,-1)));\n  EVAL2(\"regular subarray lvalue\", myMatrix23, M, myVector3, w, M(0,stride(0,end,2)) *= w(stride(end,0,-2)));\n  EVAL(\"irregular subarray rvalue\", myMatrix23, M, M(stride(1,0,-1),find(M(0,__)>4)) = 0);\n  EVAL(\"slice leading dimension\", myMatrix23, M, M[end] = 0);\n  EVAL(\"slice two dimensions\", myMatrix23, M, M[end][0] = 0);\n  EVAL2(\"diag_vector member function as rvalue\", myVector2, z, myMatrix33, S, z = diag_vector(S,1));\n  EVAL2(\"diag_vector member function as lvalue\", myMatrix33, S, myVector3, v, S.diag_vector() += v);\n  EVAL2(\"diag_matrix member function\", myMatrix33, S, myVector3, v, S = v.diag_matrix());\n  EVAL2(\"diag_matrix external function\", myMatrix33, S, myVector3, v, S = diag_matrix(v));\n  EVAL2(\"transpose as rvalue via T member function\", myMatrix32, A, myMatrix23, M, A = 2 * M.T());\n  EVAL2(\"transpose as rvalue via permute member function\", myMatrix32, A, myMatrix23, M, A = 2 * M.permute(1,0));\n  //  EVAL3(\"2D arbitrary index as rvalue\", myMatrix22, B, myMatrix23, N, intVector, index, B = const_cast<const myMatrix23&>(N)(index,index));\n  EVAL3(\"2D arbitrary index as rvalue\", myMatrix22, B, myMatrix23, N, intVector, index, B = N(index,index));\n  EVAL3(\"2D arbitrary index as lvalue\", myMatrix23, M, myMatrix23, N, intVector, index, M(index,index) = N(__,range(1,2)));\n  EVAL2(\"2D arbitrary index as lvalue with assign-multiply operator\", myMatrix23, M, intVector, index, M(index,index) *= 10.0);\n  EVAL2(\"2D arbitrary index as lvalue with aliased right-hand-side\", myMatrix23, M, intVector, index, M(index,index) += M(__,range(1,2)));\n\n  HEADING(\"REDUCTION OPERATIONS\"); \n  EVAL2(\"full reduction\", myReal, x, myMatrix23, M, x = sum(M));\n  EVAL2(\"1-dimension reduction\", myVector3, v, myMatrix23, M, v = 0.5 * mean(M,0));\n  EVAL2(\"1-dimension reduction\", myVector2, z, myMatrix23, M, z = norm2(M,1));\n  EVAL2(\"maxval\", myVector2, z, myMatrix23, M, z = maxval(M,1));\n  EVAL2(\"minval\", myVector2, z, myMatrix23, M, z = minval(M,1));\n  EVAL2(\"dot product\", myReal, x, myVector3, w, x = dot_product(w,w(stride(end,0,-1))));\n  //  EVAL2(\"1D interpolation\", myVector3, v, myVector3, w, (v = interp<double,double,true,double>(value(v), w, value(w)/3.0) ));\n  EVAL2(\"1D interpolation\", myVector3, v, myVector3, w, v = interp(value(v), w, value(w)/2.0));\n  EVAL2(\"1D clamped interpolation\", myVector3, v, myVector3, w, v = interp(value(v), w, value(w)/2.0, ADEPT_EXTRAPOLATE_CLAMP));\n#ifndef ALL_ACTIVE\n  EVAL2(\"1D interpolation of matrix\", myMatrix23, M, myVector3, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0));\n  EVAL2(\"1D clamped interpolation of matrix\", myMatrix23, M, myVector3, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0, ADEPT_EXTRAPOLATE_CLAMP));\n#endif\n  HEADING(\"CONDITIONAL OPERATIONS\");\n  EVAL2(\"where construct, scalar right-hand-side\", myMatrix23, M, myMatrix23, N, M.where(N > 20) = 0);\n  EVAL2(\"where construct, expression right-hand-side\", myMatrix23, M, myMatrix23, N, M.where(N > 20) = -N);\n  EVAL2(\"where construct, scalar either-or right-hand-side\", myMatrix23, M, myMatrix23, N, M.where(N > 20) = either_or(0,1));\n  EVAL2(\"where construct, expression either-or right-hand-side\", myMatrix23, M, myMatrix23, N, M.where(N > 20) = either_or(-N,N));\n  EVAL(\"find construct, scalar right-hand-side\", myVector3, v, v(find(v > 3.5)) = 0);\n  EVAL(\"find construct, expression right-hand-side\", myVector3, v, v(find(v > 3.5)) = -v(range(end,end)));\n  EVAL(\"find construct, multiply-assign right-hand-side\", myVector3, v, v(find(v != 5.0)) *= 10.0);\n\n  HEADING(\"SPECIAL SQUARE MATRICES\");\n  EVAL2(\"SymmMatrix assign from fixed matrix\", mySymmMatrix, O, myMatrix33, S, O = S);\n  EVAL2(\"DiagMatrix assign from dense matrix\", myDiagMatrix, D, myMatrix33, S, D = S);\n  EVAL2(\"TridiagMatrix assign from dense matrix\", myTridiagMatrix, T, myMatrix33, S, T = S);\n  EVAL2(\"LowerMatrix assign from dense matrix\", myLowerMatrix, L, myMatrix33, S, L = S);\n  EVAL2(\"UpperMatrix assign from dense matrix\", myUpperMatrix, U, myMatrix33, S, U = S);\n  EVAL2(\"SymmMatrix as rvalue\", myMatrix33, S, mySymmMatrix, O, S = O);\n  EVAL2(\"DiagMatrix as rvalue\", myMatrix33, S, myDiagMatrix, D, S = D);\n  EVAL2(\"TridiagMatrix as rvalue\", myMatrix33, S, myTridiagMatrix, T, S = T);\n  EVAL2(\"LowerMatrix as rvalue\", myMatrix33, S, myLowerMatrix, L, S = L);\n  EVAL2(\"UpperMatrix as rvalue\", myMatrix33, S, myUpperMatrix, U, S = U);\n\n  EVAL2(\"Array submatrix_on_diagonal member function\", myMatrix22, B, myMatrix33, S, B = S.submatrix_on_diagonal(1,2));\n  EVAL(\"Array submatrix_on_diagonal member function as lvalue\", myMatrix33, S, S.submatrix_on_diagonal(0,1) = 0);\n\n  should_fail = true;\n  EVAL2(\"Array submatrix_on_diagonal member function to non-square matrix\", myMatrix22, B, myMatrix33, N, B = N.submatrix_on_diagonal(1,2));\n  should_fail = false;\n\n#ifndef MARVEL_STYLE\n  if (adept::have_matrix_multiplication()) {\n    HEADING(\"MATRIX MULTIPLICATION\");\n    EVAL2(\"Matrix-Matrix multiplication\", myMatrix33, S, myMatrix23, M, S = M.T() ** M);\n    EVAL2(\"Matrix-Matrix multiplication with matmul\", myMatrix33, S, myMatrix23, M, S = matmul(M.T(), M));\n\n    should_fail = true;\n    EVAL2(\"Matrix-Matrix multiplication with inner dimension mismatch\", myMatrix33, S, myMatrix23, M, S = M ** M);\n    should_fail = false;\n    \n    // TESTING!\n    EVAL2(\"Matrix-Matrix-Vector multiplication\", myVector3, v, myMatrix33, S, v = S ** S ** v);\n    \n    EVAL2(\"Matrix-Matrix-Vector multiplication\", myVector3, v, myMatrix33, S, v = S ** log(S) ** S(0,__));\n    EVAL2(\"Vector-Matrix multiplication\", myVector3, v, myMatrix33, S, v = v ** S);\n    EVAL2(\"Vector-Matrix multiplication with matmul\", myVector3, v, myMatrix33, S, v = matmul(v, S));\n    EVAL2(\"SymmMatrix-Vector multiplication\", myVector3, v, mySymmMatrix, O, v = O ** v);\n    EVAL2(\"SymmMatrix-Matrix multiplication\", myMatrix33, S, mySymmMatrix, O, S = O ** S);\n    EVAL2(\"Vector-SymmMatrix multiplication\", myVector3, v, mySymmMatrix, O, v = v ** O);\n    EVAL2(\"Matrix-SymmMatrix multiplication\", myMatrix23, M, mySymmMatrix, O, M = M ** O);\n    EVAL2(\"DiagMatrix-Vector multiplication\", myVector3, v, myDiagMatrix, D, v = D ** v);\n    EVAL2(\"TridiagMatrix-Vector multiplication\", myVector3, v, myTridiagMatrix, T, v = T ** v);\n    EVAL2(\"TridiagMatrix-Matrix multiplication\", myMatrix33, S, myTridiagMatrix, T, S = T ** S);\n    EVAL2(\"Vector-TridiagMatrix multiplication\", myVector3, v, myTridiagMatrix, T, v = v ** T);\n    EVAL2(\"Matrix-TridiagMatrix multiplication\", myMatrix23, M, myTridiagMatrix, T, M = M ** T);\n  }\n  else {\n    std::cout << \"NO MATRIX MULTIPLICATION TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\\n\";\n  }\n\n#ifndef ALL_ACTIVE\n  if (adept::have_linear_algebra()) {\n    HEADING(\"LINEAR ALGEBRA\");\n    EVAL2(\"Solving general linear equations Ax=b\", myVector3, v, myMatrix33, S, v = solve(S,v));\n    \n    EVAL2(\"Solving general linear equations AX=B\", myMatrix23, M, myMatrix33, S, M.T() = solve(S,M.T()));\n    EVAL2(\"Solving linear equations Ax=b with symmetric A\", myVector3, v, mySymmMatrix, O, v = solve(O,v));\n    EVAL2(\"Solving linear equations AX=B with symmetric A\", myMatrix23, M, mySymmMatrix, O, M.T() = solve(O,M.T()));\n    EVAL2(\"Invert general matrix\", myMatrix33, C, myMatrix33, S, C = inv(S));\n  }\n  else {\n    std::cout << \"NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\\n\";\n  }    \n#else\n    std::cout << \"NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ACTIVE ARRAYS NOT YET SUPPORTED\\n\";\n#endif\n#else\n    std::cout << \"NO MATRIX TESTS PERFORMED BECAUSE USING MARVEL-STYLE ACTIVE ARRAYS\\n\";\n#endif\n\n\n  HEADING(\"FILLING ARRAYS\");\n  EVAL(\"Fill vector with \\\"<<\\\"\", myVector3, v, (v << 0.1, 0.2));\n\n  should_fail = true;\n  EVAL(\"Overfill vector with \\\"<<\\\"\", myVector3, v, (v << 0.1, 0.2, 0.3, 0.4));\n  should_fail = false;\n\n  EVAL(\"Underfill matrix with \\\"<<\\\"\", myMatrix23, M, (M << 0.1, 0.2, 0.3, 0.4, 0.5));\n  EVAL(\"Fill matrix with \\\"<<\\\"\", myMatrix23, M, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6));\n\n  should_fail = true;\n  EVAL(\"Overfill matrix with \\\"<<\\\"\", myMatrix23, M, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0));\n  should_fail = false;\n\n  EVAL(\"Fill vector with vectors using \\\"<<\\\"\", myVector3, v, v << v(range(1,2)) << 0.1);\n  EVAL2(\"Fill matrix with vector using \\\"<<\\\"\", myMatrix23, M, myVector3, v, M << 0.1 << 0.2 << 0.3 << v);\n  EVAL2(\"Fill matrix with vector using \\\"<<\\\"\", myMatrix33, S, myVector3, v, S << v << v << v);\n  EVAL(\"Assign array using range\", myVector3, v, v = range(3,5));\n\n#ifdef ADEPT_BOUNDS_CHECKING\n  HEADING(\"BOUNDS CHECKING\");\n  should_fail = true;\n  EVAL(\"Access vector out of bounds\", myVector3, v, v(0) = v(4));\n  EVAL(\"Access vector out of bounds\", myVector3, v, v(0) = v(end-4));\n  EVAL(\"Access matrix out of bounds\", myMatrix23, M, M(0,0) = M(0,-1));\n  EVAL(\"Access matrix out of bounds\", myMatrix23, M, M(0,0) = M(end+1,1));\n  should_fail = false;\n#endif\n\n\n  std::cout << \"====================================================================\\n\";\n  if (anomalous_results > 0) {\n    std::cout << \"*** In terms of run-time errors, there were \" << anomalous_results << \" incorrect results\\n\";\n    return 1;\n  }\n  else {\n    std::cout << \"In terms of run-time errors, all tests were passed\\n\";\n    return 0;\n  }\n}\n"
  },
  {
    "path": "test/test_gsl_interface.cpp",
    "content": "/* test_gsl_interface.cpp - \"main\" function for Test 4\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// This program minimizes the N-dimensional Rosenbrock banana\n// function, with the number of dimensions optionally provided on the\n// command line\n\n#include <iostream>\n#include <vector>\n#include <cstdlib>\n\n#include \"state.h\"\n\nint\nmain(int argc, char** argv)\n{\n  std::cout << \"Testing Adept-GSL interface using N-dimensional Rosenbrock function\\n\";\n  std::cout << \"Usage: \" << argv[0] << \" [number_of_dimensions]\\n\";\n\n  // Read number of dimensions from the command line (default 2)\n  int nx = 2;\n  if (argc > 1) {\n    nx = std::atoi(argv[1]);\n  }\n   \n  if (nx < 2) {\n    std::cout << \"Error: must have 2 or more dimensions, but \"\n\t      << nx << \" requested\\n\";\n    return 1;\n  }\n\n  // Create minimization environment (see state.h) and then minimize\n  // the function; note that initial values are set on construction.\n  State state(nx);\n  state.minimize();\n\n  // Print out the result\n  std::vector<double> x;\n  state.x(x);\n  std::cout << \"Final state: x = [\";\n  for (int i = 0; i < nx; i++) {\n    std::cout << \" \" << x[i];\n  }\n  std::cout << \"]\\n\";\n  \n  return 0;\n}\n"
  },
  {
    "path": "test/test_interp.cpp",
    "content": "/* test_interp.cpp\n\n  Copyright (C) 2024- European Centre for Medium-Range Weather Forecasts\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n  This file tests interpolation operations\n*/\n\n#include <iostream>\n#include \"adept_arrays.h\"\n\nusing namespace adept;\n\n#define TEST_MULTI(FUNC)\t\t\t\t\\\n  {\t\t\t\t\t\t\\\n    std::cout << #FUNC << \" =\";\t\t\\\n    std::cout << FUNC << \"\\n\";\t\t\t\\\n  }\n\n#define TEST(FUNC)\t\t\t\t\t\\\n  {\t\t\t\t\t\t\t\\\n    std::cout << FUNC << \"     \\t = \" << #FUNC << \"\\n\";\t\\\n  }\n\nint\nmain(int argc, const char** argv)\n{\n  set_array_print_style(PRINT_STYLE_MATLAB);\n  {\n    std::cout << \"*** 1D interpolation ***\\n\\n\";\n    Vector x = {1.0, 4.0, 9.0};\n    Vector m = {2.0, 3.0, 5.0};\n    Vector xi = {4.0, 4.8, 3.0, 0.5, 10.0};\n    std::cout << \"Coordinate vector and interpolation vector:\\n\";\n    std::cout << \"x  = \" << x << \"\\n\";\n    std::cout << \"m  = \" << m << \"\\n\";\n    std::cout << \"xi = \" << xi << \"\\n\";\n    std::cout << \"...which are:\\n\"\n\t      << \"  (1) at a point in the interpolation vector,\\n\"\n\t      << \"  (2) between points in the interpolation vector (closer to left),\\n\"\n      \t      << \"  (3) between points in the interpolation vector (closer to right),\\n\"\n\t      << \"  (4) off the left of the interpolation vector, and\\n\"\n\t      << \"  (5) off the right of the interpolation vector.\\n\\n\";\n    TEST(interp(x,m,xi));\n    TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_CLAMP));\n    TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_CONSTANT));\n    TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_CONSTANT,-10.0));\n    TEST(interp(x(stride(end,0,-1)),m(stride(end,0,-1)),xi,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp(x+0.0,m+0.0,xi+0.0,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST));\n    TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP));\n    TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT));\n    TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT,-10.0));\n    TEST(interp(x(stride(end,0,-1)),m(stride(end,0,-1)),xi,ADEPT_INTERPOLATE_NEAREST));\n\n    Matrix M = spread<1>(m,2);\n    std::cout << \"\\n*** Multiple 1D linear interpolation ***\\n\";\n    std::cout << \"M = \" << M << \"\\n\";\n    TEST_MULTI(interp(x,M,xi));\n    TEST_MULTI(interp(x,M,xi,ADEPT_INTERPOLATE_NEAREST));\n  }\n\n  \n  {\n    std::cout << \"\\n*** 2D linear interpolation ***\\n\\n\";\n    int nx = 4;\n    int ny = 3;\n\n    Vector y = pow(linspace(1.0,ny,ny),2.0);\n    Vector x = linspace(1.0,nx,nx);\n    Matrix M = {{2.0,3.0,5.0,7.0},\n\t\t{11.0,13.0,17.0,19.0},\n\t\t{23.0,29.0,31.0,37.0}};//outer_product(y,x);\n    \n    Vector yi = {4.0, 2.0, 6.5, 0.5};\n    Vector xi = {2.0, 3.8, 0.5, 5.0};\n\n    std::cout << \"Coordinate vectors and interpolation matrix:\\n\";\n    std::cout << \"y = \" << y << \"\\n\";\n    std::cout << \"x = \" << x << \"\\n\";\n    std::cout << \"M = \" << M << \"\\n\";\n    std::cout << \"\\nTo be interpolated to the following points:\\n\";\n    std::cout << \"yi = \" << yi << \"\\n\";\n    std::cout << \"xi = \" << xi << \"\\n\";\n    std::cout << \"...which are:\\n\"\n\t      << \"  (1) at a point in the interpolation matrix,\\n\"\n\t      << \"  (2) between points in the interpolation matrix,\\n\"\n\t      << \"  (3) off the left of the matrix, and\\n\"\n\t      << \"  (4) off the top-right of the matrix.\\n\\n\";\n  \n    TEST(interp2d(y,x,M,yi,xi));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_CLAMP));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT,-10.0));\n    TEST(interp2d(y(stride(end,0,-1)),x,M(stride(end,0,-1),__),yi,xi));\n    TEST(interp2d(y+0.0,x+0.0,M+0.0,yi+0.0,xi+0.0));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT));\n    TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT,-10.0));\n    TEST(interp2d(y(stride(end,0,-1)),x,M(stride(end,0,-1),__),yi,xi,ADEPT_INTERPOLATE_NEAREST));\n\n    Array3D A = spread<2>(M,2);\n    std::cout << \"\\n*** Multiple 2D linear interpolation ***\\n\";\n    std::cout << \"A = \" << A << \"\\n\";\n    TEST_MULTI(interp2d(y,x,A,yi,xi));\n  }\n\n  {\n    std::cout << \"\\n*** 3D interpolation ***\\n\\n\";\n    int nx = 4;\n    int ny = 3;\n    int nz = 2;\n\n    Vector z = linspace(1.0,nz,nz);\n    Vector y = linspace(1.0,ny,ny);\n    Vector x = pow(linspace(1.0,nx,nx),2.0);\n    Array3D A(nz,ny,nx);\n    A(0,__,__) = outer_product(y,x);\n    A(1,__,__) = outer_product(y,x)+1.0;\n\n    Vector zi = {2.0, 1.2, 1.5,  5.0};\n    Vector yi = {2.0, 2.6, 0.5,  5.0};\n    Vector xi = {4.0, 10.0,20.0, 0.5};\n\n    std::cout << \"Coordinate vectors and interpolation array:\\n\";\n    std::cout << \"z = \" << z << \"\\n\";\n    std::cout << \"y = \" << y << \"\\n\";\n    std::cout << \"x = \" << x << \"\\n\";\n    std::cout << \"A = \" << A << \"\\n\";\n    std::cout << \"\\nTo be interpolated to the following points:\\n\";\n    std::cout << \"zi = \" << zi << \"\\n\";\n    std::cout << \"yi = \" << yi << \"\\n\";\n    std::cout << \"xi = \" << xi << \"\\n\";\n    std::cout << \"...which are:\\n\"\n\t      << \"  (1) at a point in the interpolation array,\\n\"\n\t      << \"  (2) between points in the interpolation array,\\n\"\n\t      << \"  (3) off the array in two dimension but not the third, and\\n\"\n\t      << \"  (4) off all dimensions of the array.\\n\\n\";\n  \n    TEST(interp3d(z,y,x,A,zi,yi,xi));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_CLAMP));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT,-10.0));\n    TEST(interp3d(z,y(stride(end,0,-1)),x,A(__,stride(end,0,-1),__),zi,yi,xi,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp3d(z+0.0,y+0.0,x+0.0,A+0.0,zi+0.0,yi+0.0,xi+0.0,ADEPT_EXTRAPOLATE_LINEAR));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT));\n    TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT,-10.0));\n    TEST(interp3d(z,y(stride(end,0,-1)),x,A(__,stride(end,0,-1),__),zi,yi,xi,ADEPT_INTERPOLATE_NEAREST));\n\n  }\n  \n  return 0;\n}\n"
  },
  {
    "path": "test/test_minimizer.cpp",
    "content": "/* test_minimizer.cpp - Test Adept minimizer with N-dimensional Rosenbrock function\n\n  Copyright (C) 2020-2022 ECMWF\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n//#include <string> // for std::stoi in C++11\n#include <cstdio>  // for std::sscanf in C++98\n#include <iostream>\n#include <adept_optimize.h>\n\n// Set this to a large or small number to test if the minimization\n// algorithms are immune to the absolute scaling of the cost function\n#define COST_SCALING 1.0\n\nusing namespace adept;\n\nclass RosenbrockN : public Optimizable {\npublic:\n\n  RosenbrockN() : ls_iteration_(0), exact_hessian_(false) {}\n\n  int ls_iteration_;   // Line search iteration\n\n  // Do we use the exact Hessian derivate analytically, or the\n  // approximate one from the Jacobian matrix and the Gauss-Newton\n  // formula?\n  bool exact_hessian_;\n\n  // N-dimensional Rosenbrock function can be expressed as the sum of\n  // the squared elements of vector y(x) defined as follows.  This\n  // form facilitates the calculation of the approximate Hessian from\n  // the Jacobian dy/dx.  It is templated so that can be called either\n  // with a passive \"Vector\" or active \"aVector\" argument.\n  template <bool IsActive>\n  Array<1,Real,IsActive> calc_y(const Array<1,Real,IsActive>& x) {\n    int nx = x.size();\n    Array<1,Real,IsActive> y((nx-1)*2);\n    for (int ix = 0; ix < nx-1; ++ix) {\n      y(ix*2)   = 10.0 * (x(ix+1)-x(ix)*x(ix));\n      y(ix*2+1) = 1.0 - x(ix);\n    }\n    y *= sqrt(2.0 * COST_SCALING);\n    return y;\n  }\n\n  void calc_exact_hessian(const adept::Vector& x, SymmMatrix& hessian) {\n    hessian = 0.0;\n    int nx = hessian.dimension();\n    for (int ix = 0; ix < nx-1; ++ix) {\n      hessian(ix,ix) = 1200.0*x(ix)*x(ix) - 400.0*x(ix+1) + 2.0;\n      hessian(ix,ix+1) = -400.0*x(ix);\n    }\n    for (int ix = 1; ix < nx; ++ix) {\n      hessian(ix,ix) = hessian(ix,ix) + 200.0;\n    }\n  }\n\n  virtual void report_progress(int niter, const adept::Vector& x,\n\t\t\t       Real cost, Real gnorm) {\n    ls_iteration_ = 0;\n    std::cout << \"Iteration \" << niter\n\t      << \": cost=\" << cost << \", gnorm=\" << gnorm << \"\\n\";\n  }\n\n  void state_to_stderr(const adept::Vector& x, Real cost) {\n    \n    // For plotting progress, direct standard error to a text file\n    std::cerr << ls_iteration_ << \" \";\n    for (int ix = 0; ix < x.size(); ++ix) {\n      std::cerr << x(ix) << \" \";\n    }\n    std::cerr << cost << \"\\n\";\n    ++ls_iteration_;\n  }\n\n  void final_state_to_stderr(const adept::Vector& x, Real cost) {\n    ls_iteration_ = -1;\n    state_to_stderr(x, cost);\n  }\n\n  virtual bool provides_derivative(int order) {\n    if (order >= 0 && order <= 2) {\n      return true;\n    }\n    else {\n      return false;\n    }\n  }\n\n  virtual Real calc_cost_function(const Vector& x) {\n    //std::cout << \"  test x: \" << x << \"\\n\";\n    Vector y = calc_y(x);\n    Real cost = 0.5*sum(y*y);\n    state_to_stderr(x,cost);\n    return cost;\n  }\n\n  virtual Real calc_cost_function_gradient(const Vector& x,\n\t\t\t\t\t   Vector gradient) {\n    Stack stack;\n    aVector xactive = x;\n    stack.new_recording();\n    aVector y = calc_y(xactive);\n    aReal cost = 0.5*sum(y*y);\n    cost.set_gradient(1.0);\n    stack.reverse();\n    gradient = xactive.get_gradient();\n    state_to_stderr(x,value(cost));\n    return value(cost);\n  }\n\n  virtual Real calc_cost_function_gradient_hessian(const Vector& x,\n\t\t\t\t\t\t   Vector gradient,\n\t\t\t\t\t\t   SymmMatrix& hessian) {\n    Stack stack;\n    aVector xactive = x;\n    stack.new_recording();\n    aVector y = calc_y(xactive);\n    aReal cost = 0.5*sum(y*y);\n    stack.independent(xactive);\n    stack.dependent(y);\n    Matrix jac = stack.jacobian();\n    if (exact_hessian_) {\n      calc_exact_hessian(x, hessian);\n    }\n    else {\n      hessian  = jac.T() ** jac;\n    }\n    gradient = jac.T() ** value(y);\n    state_to_stderr(x,value(cost));\n    return value(cost);\n  }\n\n};\n\nint\nmain(int argc, const char* argv[])\n{\n\n  if (!adept::have_linear_algebra()) {\n    std::cout << \"Adept compiled without linear-algebra support: minimizer not available\\n\";\n    return 0;\n  }\n\n  RosenbrockN rosenbrock;\n  Minimizer minimizer(MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT);\n  // The convergence criterion should be changed in accordance with\n  // the cost function scaling\n  minimizer.set_converged_gradient_norm(0.1*COST_SCALING);\n  int nx = 2;\n  if (argc > 1) {\n    // nx = std::stoi(argv[1]);\n    std::sscanf(argv[1], \"%d\", &nx);\n    if (argc > 2) {\n      const char* algo_ptr = argv[2];\n      std::string algo(argv[2]);\n      // If algorithm name is prefixed by \"Newton-\" then use the exact\n      // Hessian matrix (analytically derived for this specific\n      // function) rather than the Gauss-Newton approximation from the\n      // Jacobian matrix\n      if (algo.find(\"Newton-\") == 0) {\n\talgo_ptr += 7;\n\trosenbrock.exact_hessian_ = true;\n      }\n      minimizer.set_algorithm(algo_ptr);\n      if (argc > 3) {\n\tint max_it;\n\t// max_it = std::stof(argv[3]);\n\tstd::sscanf(argv[3], \"%d\", &max_it);\n\tminimizer.set_max_iterations(max_it);\n\tif (argc > 4) {\n\t  double converged_grad_norm;\n\t  //converged_grad_norm = std::stof(argv[4]);\n\t  std::sscanf(argv[4], \"%lf\", &converged_grad_norm);\n\t  minimizer.set_converged_gradient_norm(converged_grad_norm);\n\t}\n      }\n    }\n  }\n  else {\n    std::cout << \"Usage: \" << argv[0] << \" [nx] [Levenberg|Levenberg-Marquardt|Newton-Levenberg|Newton-Levenberg-Marquardt|L-BFGS|Conjugate-Gradient] [max_iterations] [converged_gradient_norm]\\n\";\n  }\n\n  minimizer.set_levenberg_damping_start(0.25);\n  //minimizer.set_max_step_size(1.0);\n  //  minimizer.set_levenberg_damping_multiplier(3.0, 5.0);\n  minimizer.ensure_updated_state(2);\n\n  std::cout << \"Minimizing \" << nx << \"-dimensional Rosenbrock function\\n\";\n  std::cout << \"Algorithm: \" << minimizer.algorithm_name() << \"\\n\";\n  std::cout << \"Use exact Hessian: \" << rosenbrock.exact_hessian_ << \"\\n\";\n  std::cout << \"Maximum iterations: \" << minimizer.max_iterations() << \"\\n\";\n  std::cout << \"Converged gradient norm: \" << minimizer.converged_gradient_norm() << \"\\n\";\n\n  // Initial state vector\n  Vector x(nx);\n  // Standard start\n  x = -3.0;\n  // Trickier start (other end of the banana)\n  //x = -3.0; x(1) = 3.0;\n  // Near other minima in higher dimensions\n  //x = 1.0; x(0) = -1.0;\n\n  bool is_bounded = false;\n  MinimizerStatus status;\n\n  if (is_bounded) {\n    //    x = -3.0; x(1) = 3.0;\n    x = -0.75; x(1) = 3.0;\n    Vector x_lower, x_upper;\n    adept::minimizer_initialize_bounds(nx, x_lower, x_upper);\n    // x_upper(1) = 2.0;   x_lower(1) = 0.2;\n    x_lower(0) = -1;\n    status = minimizer.minimize(rosenbrock, x, x_lower, x_upper);\n  }\n  else {\n    status = minimizer.minimize(rosenbrock, x);\n  }\n  //rosenbrock.final_state_to_stderr(x, minimizer.cost_function());\n\n  std::cout << \"Status: \" << minimizer_status_string(status) << \"\\n\";\n  std::cout << \"Solution: x=\" << x << \"\\n\";\n  std::cout << \"Number of samples: \" << minimizer.n_samples() << \"\\n\";\n\n  return static_cast<int>(status);\n}\n"
  },
  {
    "path": "test/test_misc.cpp",
    "content": "/* test_misc.cpp\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include \"adept.h\"\n#include \"algorithm.h\"\n\n// A straight implementation of the trivial example in Hogan (2014)\n\ndouble algorithm_ad(const double x_val[2], // Input values\n                    double* Y_ad,          // Input-output adjoint\n                    double x_ad[2]) {      // Output adjoint\n  using namespace adept;                   // Import Stack and adouble from adept\n  Stack stack;                             // Where differential information is stored\n  adouble x[2] = {x_val[0], x_val[1]};     // Initialize adouble inputs\n  stack.new_recording();                   // Start recording derivatives\n  adouble Y = algorithm(x);                // Version overloaded for adouble args\n  Y.set_gradient(*Y_ad);                   // Load the input-output adjoint\n  stack.reverse();                         // Run the adjoint algorithm\n  x_ad[0] = x[0].get_gradient();           // Extract the output adjoint for x[0]\n  x_ad[1] = x[1].get_gradient();           //   ...and x[1]\n  *Y_ad   = Y.get_gradient();              // Input-output adjoint has changed too\n  return Y.value();                        // Return result of simple computation\n}   \n\nint main()\n{\n  double x[2] = {2.0, 3.0};\n  double y_ad = 1.0;\n  double x_ad[2];\n  double y = algorithm_ad(x, &y_ad, x_ad);\n  std::cout << \"x[0] = \" << x[0] << \"\\n\"\n\t    << \"x[1] = \" << x[1] << \"\\n\"\n\t    << \"y    = \" << y    << \"\\n\"\n\t    << \"y_ad = \" << y_ad << \"\\n\"\n\t    << \"x_ad[0]=\" << x_ad[0] << \"\\n\"\n\t    << \"x_ad[1]=\" << x_ad[1] << \"\\n\";\n  return 0;\n}\n"
  },
  {
    "path": "test/test_no_lib.cpp",
    "content": "/* test_no_lib.cpp\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n// This source file contains everything that would normally be\n// compiled into a static or dynamic library; this means that the\n// executable can be built without needing to link to the\n// library. This is useful for non-Unix platforms where the configure\n// script cannot be run. Note that only one source file should\n// #include \"adept_source.h\"; all the others should #include \"adept.h\"\n// as normal.\n#include \"adept_source.h\"\n\n#include \"algorithm.h\"\n\n// A straight implementation of the trivial example in Hogan (2014)\n\ndouble algorithm_ad(const double x_val[2], // Input values\n                    double* Y_ad,          // Input-output adjoint\n                    double x_ad[2]) {      // Output adjoint\n  using namespace adept;                   // Import Stack and adouble from adept\n  Stack stack;                             // Where differential information is stored\n  adouble x[2] = {x_val[0], x_val[1]};     // Initialize adouble inputs\n  stack.new_recording();                   // Start recording derivatives\n  adouble Y = algorithm(x);                // Version overloaded for adouble args\n  Y.set_gradient(*Y_ad);                   // Load the input-output adjoint\n  stack.reverse();                         // Run the adjoint algorithm\n  x_ad[0] = x[0].get_gradient();           // Extract the output adjoint for x[0]\n  x_ad[1] = x[1].get_gradient();           //   ...and x[1]\n  *Y_ad   = Y.get_gradient();              // Input-output adjoint has changed too\n  return Y.value();                        // Return result of simple computation\n}   \n\nint main()\n{\n  double x[2] = {2.0, 3.0};\n  double y_ad = 1.0;\n  double x_ad[2];\n  double y = algorithm_ad(x, &y_ad, x_ad);\n  std::cout << \"x[0] = \" << x[0] << \"\\n\"\n\t    << \"x[1] = \" << x[1] << \"\\n\"\n\t    << \"y    = \" << y    << \"\\n\"\n\t    << \"y_ad = \" << y_ad << \"\\n\"\n\t    << \"x_ad[0]=\" << x_ad[0] << \"\\n\"\n\t    << \"x_ad[1]=\" << x_ad[1] << \"\\n\";\n  return 0;\n}\n"
  },
  {
    "path": "test/test_packet_operations.cpp",
    "content": "/* test_packet_operations.cpp\n\n  Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n  This file tests Adept's vectorization capabilities Adept vectors of\n  types float and double, and also Packet<float> and Packet<double>\n  that encapsulate the underlying intrinsic SIMD vector types.\n*/\n\n#include <iostream>\n#include \"adept_arrays.h\"\n\nusing namespace adept;\n\ntemplate <typename Type>\nArray<1,Type> p2v(internal::Packet<Type> p) {\n  Array<1,Type> v(internal::Packet<Type>::size);\n  p.put(v.data());\n  return v;\n}\n\ntemplate <typename Type>\nvoid test_packet_operations() {\n  static const int N = internal::Packet<Type>::size;\n  std::cout << \"\\nADEPT PACKET\\n\";\n  std::cout << \"Type: \" << sizeof(Type) << \"-byte floating point numbers\\n\";\n  std::cout << \"Packet size: \" << N << \"\\n\";\n  Array<1,Type> v(N), w(N);\n  v = range(1,N);\n  w = 2.0;\n  internal::Packet<Type> p(v.data());\n  internal::Packet<Type> q(w.data());\n  std::cout << \"p = \" << p2v(p) << \"\\n\";\n  std::cout << \"q = \" << p2v(q) << \"\\n\";\n  std::cout << \"p+q = \" << p2v(p+q) << \"\\n\";\n  std::cout << \"p-q = \" << p2v(p-q) << \"\\n\";\n  std::cout << \"p*q = \" << p2v(p*q) << \"\\n\";\n  std::cout << \"p/q = \" << p2v(p/q) << \"\\n\";\n  std::cout << \"sqrt(p) = \" << p2v(sqrt(p)) << \"\\n\";\n  std::cout << \"fmin(p,q) = \" << p2v(fmin(p,q)) << \"\\n\";\n  std::cout << \"fmax(p,q) = \" << p2v(fmax(p,q)) << \"\\n\";\n  std::cout << \"hsum(p) = \" << hsum(p) << \"\\n\";\n  std::cout << \"hprod(p) = \" << hprod(p) << \"\\n\";\n  std::cout << \"hmin(p) = \" << hmin(p) << \"\\n\";\n  std::cout << \"hmax(p) = \" << hmax(p) << \"\\n\";\n}\n  \n\ntemplate <typename Type>\nvoid test_vector_operations(int N) {\n  std::cout << \"\\nADEPT ARRAY\\n\";\n  std::cout << \"Type: \" << sizeof(Type) << \"-byte floating point numbers\\n\";\n  std::cout << \"Packet size: \" << internal::Packet<Type>::size << \"\\n\";\n  Array<1,Type> v(N), w(N);\n  v = range(1,N);\n  w = 2.0;\n  std::cout << \"v = \" << v << \"\\n\";\n  std::cout << \"w = \" << w << \"\\n\";\n  std::cout << \"v+w = \" << v+w << \"\\n\";\n  std::cout << \"v-w = \" << v-w << \"\\n\";\n  std::cout << \"v*w = \" << v*w << \"\\n\";\n  std::cout << \"v/w = \" << v/w << \"\\n\";\n  std::cout << \"sqrt(v) = \" << sqrt(v) << \"\\n\";\n  std::cout << \"fmin(v,w) = \" << fmin(v,w) << \"\\n\";\n  std::cout << \"fmax(v,w) = \" << fmax(v,w) << \"\\n\";\n  std::cout << \"sum(v) = \" << sum(v) << \"\\n\";\n  std::cout << \"product(v) = \" << product(v) << \"\\n\";\n  std::cout << \"minval(v) = \" << minval(v) << \"\\n\";\n  std::cout << \"maxval(v) = \" << maxval(v) << \"\\n\";\n}\n\ntemplate <typename Type>\nvoid test_unaligned_reduce(int N) {\n  std::cout << \"\\nUNALIGNED REDUCE\\n\";\n  std::cout << \"Type: \" << sizeof(Type) << \"-byte floating point numbers\\n\";\n  std::cout << \"Packet size: \" << internal::Packet<Type>::size << \"\\n\";\n  Array<1,Type> v(N);\n  v = range(1,N);\n  std::cout << \"v = \" << v << \"\\n\";\n  std::cout << \"sum(v(range(1,end-1))) = \" << sum(v(range(1,end-1))) << \"\\n\";\n}\n\ntemplate <typename Type>\nvoid test_unaligned_assign(int N) {\n  std::cout << \"\\nUNALIGNED ASSIGN\\n\";\n  std::cout << \"Type: \" << sizeof(Type) << \"-byte floating point numbers\\n\";\n  std::cout << \"Packet size: \" << internal::Packet<Type>::size << \"\\n\";\n  Array<1,Type> v(N), w(N), x(N);\n  v = range(1,N);\n  w = 2.0;\n  x = 0.0;\n  std::cout << \"v = \" << v << \"\\n\";\n  std::cout << \"w = \" << w << \"\\n\";\n  std::cout << \"x = \" << x << \"\\n\";\n  std::cout << \"x(range(1,end-1)) = v(range(1,end-1))+w(range(1,end-1)) ->\\n\";\n  x(range(1,end-1)) = v(range(1,end-1))+w(range(1,end-1));\n  std::cout << \"x = \" << x << \"\\n\";\n\n}\n\nint\nmain(int argc, const char** argv)\n{\n  // Vectorization is only carried out on arrays of length twice the\n  // packet length or longer\n  static const int N = 2*internal::Packet<float>::size;\n\n  test_packet_operations<float>();\n  test_packet_operations<double>();\n\n  Packet<double> d(2.0);\n  Packet<double> e = fastexp(d);\n  std::cout << \"e=\" << e << \"\\n\";\n  \n  test_vector_operations<float>(N);\n  test_vector_operations<double>(N);\n\n  test_unaligned_reduce<float>(2*N);\n  test_unaligned_reduce<double>(2*N);\n\n  test_unaligned_assign<float>(2*N);\n  test_unaligned_assign<double>(2*N);\n\n  return 0;\n}\n"
  },
  {
    "path": "test/test_radiances.cpp",
    "content": "/* test_radiances.cpp - \"main\" function for Test 3\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include \"adept.h\"\n#include \"simulate_radiances.h\"\n\nusing adept::Real;\nusing adept::aReal;\n\n// This function provides an Adept interface to the simulate_radiances\n// function\nvoid simulate_radiances_wrapper(int n,\n\t\t\t\tconst aReal& surface_temperature,\n\t\t\t\tconst aReal* temperature,\n\t\t\t\taReal radiance[2]) {\n  // Create inactive (Real) versions of the active (aReal) inputs\n  Real st = value(surface_temperature);\n  std::vector<Real> t(n);\n  for (int i = 0; i < n; ++i) t[i] = value(temperature[i]);\n  \n  // Declare variables to hold the inactive outputs and their Jacobians\n  Real r[2];\n  Real dr_dst[2];\n  std::vector<Real> dr_dt(2*n);\n  \n   // Call the function with the non-Adept interface\n  simulate_radiances(n, st, &t[0], &r[0], dr_dst, &dr_dt[0]);\n  \n  // Copy the results into the active variables, but use set_value in order\n  // not to write any equivalent derivative statement to the Adept stack\n  radiance[0].set_value(r[0]);\n  radiance[1].set_value(r[1]);\n  \n  // Loop over the two radiances and add the derivative statements to\n  // the Adept stack\n  for (int i = 0; i < 2; ++i) {\n    // Add the first term on the right-hand-side of Equation 1 in the text\n    radiance[i].add_derivative_dependence(surface_temperature, dr_dst[i]);\n    // Now append the second term on the right-hand-side of Equation\n    // 1. The third argument \"n\" of the following function says that\n    // there are n terms to be summed, and the fourth argument \"2\"\n    // says to take only every second element of the Jacobian dr_dt,\n    // since the derivatives with respect to the two radiances have\n    // been interlaced.  If the fourth argument is omitted then\n    // relevant Jacobian elements will be assumed to be contiguous in\n    // memory.\n    radiance[i].append_derivative_dependence(temperature, &dr_dt[i], n, 2);\n  }\n\n  for (int i = 0; i < 2; ++i) {\n    std::cout << \"Channel \" << i << \"\\n\";\n    std::cout << \"d[radiance]/d[surface_temperature] = \" << dr_dst[i] << \"\\n\";\n    std::cout << \"d[radiance]/d[temperature] =\";\n    for (int j = 0; j < n; ++j) {\n      std::cout << \" \" << dr_dt[i+j*2];\n    }\n    std::cout << \"\\n\\n\";\n  }\n\n}\n\n\nint\nmain(int argc, char** argv)\n{\n  // Temperature (K) at 1000-m intervals from the mid-latitude summer\n  // standard atmosphere\n  static const int N_POINTS = 25;\n  static const Real temperature_profile[N_POINTS+1]\n    = {294.0, 290.0, 285.0, 279.0, 273.0, 267.0, 261.0, 255.0,\n       248.0, 242.0, 235.0, 229.0, 222.0, 216.0, 216.0, 216.0,\n       216.0, 216.0, 216.0, 217.0, 218.0, 219.0, 220.0, 222.0,\n       223.0, 224.0};\n\n  // Start the Adept stack\n  adept::Stack s;\n  \n  // Copy the temperature profile information into active variables\n  aReal surface_temperature = temperature_profile[0];\n  aReal temperature[N_POINTS];\n  for (int i = 0; i < N_POINTS; i++) {\n    temperature[i] = temperature_profile[i+1];\n  }\n\n  // The simulated radiances will be put here...\n  aReal sim_radiance[2];\n\n  // ...and compared to the observed radiances here with their 1-sigma\n  // error\n  Real obs_radiance[2] = {0.00189, 0.00140};\n  Real radiance_error = 2.0e-5;\n\n  // Start recording derivative information\n  s.new_recording();\n\n  // Simulate the radiances for the input surface temperature and\n  // atmospheric temperature\n  simulate_radiances_wrapper(N_POINTS, surface_temperature,\n\t\t\t     temperature, sim_radiance);\n\n  std::cout << \"Simulated radiances = \"\n\t    << sim_radiance[0].value() << \" \"\n\t    << sim_radiance[1].value() << \"\\n\";\n\n  // Compute a \"cost function\" (or \"penalty function\") expressing the\n  // sum of the squared number of error standard deviations the\n  // simulated radiances are from the observed radiances\n  aReal cost_function = 0.0;\n  for (int ichan = 0; ichan < 2; ichan++) {\n    cost_function\n      += (sim_radiance[ichan] - obs_radiance[ichan])\n       * (sim_radiance[ichan] - obs_radiance[ichan])\n      / (radiance_error*radiance_error);\n  }\n  \n  std::cout << \"Cost function = \" << cost_function << \"\\n\";\n\n  // We want the computed adjoints to be gradients of the cost\n  // function with respect to the surface temperature or atmospheric\n  // temperature\n  cost_function.set_gradient(1.0);\n\n  // Reverse-mode automatic differentiation\n  s.reverse();\n\n  // Extract the gradients  \n  Real dcost_dsurface_temperature = 0;\n  Real dcost_dtemperature[N_POINTS];\n  surface_temperature.get_gradient(dcost_dsurface_temperature);\n  adept::get_gradients(temperature, N_POINTS, dcost_dtemperature);\n\n\n  std::cout << \"d[cost_function]/d[surface_temperature] = \"\n\t    << dcost_dsurface_temperature << \"\\n\";\n  std::cout << \"d[cost_function]/d[temperature] =\";\n  for (int i = 0; i < N_POINTS; i++) {\n    std::cout << \" \" << dcost_dtemperature[i];\n  }\n  std::cout << \"\\n\";\n\n\n}\n"
  },
  {
    "path": "test/test_radiances_array.cpp",
    "content": "/* test_radiances.cpp - \"main\" function for Test 3\n\n  Copyright (C) 2012-2014 The University of Reading\n  Copyright (C) 2016      European Centre for Medium Range Weather Forecasts\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n*/\n\n#include \"adept_arrays.h\"\n#include \"simulate_radiances.h\"\n\nusing adept::Real;\nusing adept::aReal;\nusing adept::Vector;\nusing adept::aVector;\nusing adept::value;\n\n// This function provides an Adept interface to the simulate_radiances\n// function\nvoid simulate_radiances_wrapper(int n,\n\t\t\t\tconst aReal& surface_temperature,\n\t\t\t\tconst aVector& temperature,\n\t\t\t\taReal radiance[2]) {\n  // Create inactive (Real) versions of the active (aReal) inputs\n  Real st = adept::value(surface_temperature);\n  Vector t(n);\n  for (int i = 0; i < n; i++) { \n    t(i) = adept::value(temperature(i));\n  }\n  \n  // Declare variables to hold the inactive outputs and their Jacobians\n  Real r[2];\n  Real dr_dst[2];\n  Vector dr_dt(2*n);\n  \n   // Call the function with the non-Adept interface\n  simulate_radiances(n, st, &t[0], &r[0], dr_dst, &dr_dt[0]);\n  \n  // Copy the results into the active variables, but use set_value in order\n  // not to write any equivalent derivative statement to the Adept stack\n  radiance[0].set_value(r[0]);\n  radiance[1].set_value(r[1]);\n  \n  // Loop over the two radiances and add the derivative statements to\n  // the Adept stack\n  for (int i = 0; i < 2; ++i) {\n    // Add the first term on the right-hand-side of Equation 1 in the text\n    radiance[i].add_derivative_dependence(surface_temperature, dr_dst[i]);\n    // Now append the second term on the right-hand-side of Equation\n    // 1. The third argument \"n\" of the following function says that\n    // there are n terms to be summed, and the fourth argument \"2\"\n    // says to take only every second element of the Jacobian dr_dt,\n    // since the derivatives with respect to the two radiances have\n    // been interlaced.  If the fourth argument is omitted then\n    // relevant Jacobian elements will be assumed to be contiguous in\n    // memory.\n    for (int j = 0; j < n; ++j) {\n      radiance[i].append_derivative_dependence(temperature(j), dr_dt(i+j*2));\n    }\n  }\n\n  for (int i = 0; i < 2; ++i) {\n    std::cout << \"Channel \" << i << \"\\n\";\n    std::cout << \"d[radiance]/d[surface_temperature] = \" << dr_dst[i] << \"\\n\";\n    std::cout << \"d[radiance]/d[temperature] =\";\n    for (int j = 0; j < n; ++j) {\n      std::cout << \" \" << dr_dt[i+j*2];\n    }\n    std::cout << \"\\n\\n\";\n  }\n\n}\n\n\nint\nmain(int argc, char** argv)\n{\n  // Temperature (K) at 1000-m intervals from the mid-latitude summer\n  // standard atmosphere\n  static const int N_POINTS = 25;\n  static const Real temperature_profile[N_POINTS+1]\n    = {294.0, 290.0, 285.0, 279.0, 273.0, 267.0, 261.0, 255.0,\n       248.0, 242.0, 235.0, 229.0, 222.0, 216.0, 216.0, 216.0,\n       216.0, 216.0, 216.0, 217.0, 218.0, 219.0, 220.0, 222.0,\n       223.0, 224.0};\n\n  // Start the Adept stack\n  adept::Stack s;\n  \n  // Copy the temperature profile information into active variables\n  aReal surface_temperature = temperature_profile[0];\n  aVector temperature(N_POINTS);\n  for (int i = 0; i < N_POINTS; i++) {\n    temperature[i] = temperature_profile[i+1];\n  }\n\n  // The simulated radiances will be put here...\n  aReal sim_radiance[2];\n\n  // ...and compared to the observed radiances here with their 1-sigma\n  // error\n  Real obs_radiance[2] = {0.00189, 0.00140};\n  Real radiance_error = 2.0e-5;\n\n  // Start recording derivative information\n  s.new_recording();\n\n  // Simulate the radiances for the input surface temperature and\n  // atmospheric temperature\n  simulate_radiances_wrapper(N_POINTS, surface_temperature,\n\t\t\t     temperature, sim_radiance);\n\n  std::cout << \"Simulated radiances = \"\n\t    << sim_radiance[0].value() << \" \"\n\t    << sim_radiance[1].value() << \"\\n\";\n\n  // Compute a \"cost function\" (or \"penalty function\") expressing the\n  // sum of the squared number of error standard deviations the\n  // simulated radiances are from the observed radiances\n  aReal cost_function = 0.0;\n  for (int ichan = 0; ichan < 2; ichan++) {\n    cost_function\n      += (sim_radiance[ichan] - obs_radiance[ichan])\n       * (sim_radiance[ichan] - obs_radiance[ichan])\n      / (radiance_error*radiance_error);\n  }\n  \n  std::cout << \"Cost function = \" << cost_function << \"\\n\";\n\n  // We want the computed adjoints to be gradients of the cost\n  // function with respect to the surface temperature or atmospheric\n  // temperature\n  cost_function.set_gradient(1.0);\n\n  // Reverse-mode automatic differentiation\n  s.reverse();\n\n  // Extract the gradients  \n  Real dcost_dsurface_temperature = 0;\n  Vector dcost_dtemperature;\n  surface_temperature.get_gradient(dcost_dsurface_temperature);\n  adept::get_gradients(temperature, dcost_dtemperature);\n\n  std::cout << \"d[cost_function]/d[surface_temperature] = \"\n\t    << dcost_dsurface_temperature << \"\\n\";\n  std::cout << \"d[cost_function]/d[temperature] =\";\n  for (int i = 0; i < N_POINTS; i++) {\n    std::cout << \" \" << dcost_dtemperature[i];\n  }\n  std::cout << \"\\n\";\n\n\n}\n"
  },
  {
    "path": "test/test_reduce_active.cpp",
    "content": "/* test_reduce_active.cpp\n\n  Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n  This file tests reduce operations on active vectors\n*/\n\n#include <iostream>\n#include \"adept_arrays.h\"\n\nusing namespace adept;\n\n#define TEST_REDUCE(FUNC)\t\t\t\\\n  {\t\t\t\t\t\t\\\n    std::cout << \"\\nTESTING REDUCE FUNCTION \"\t\\\n\t      << #FUNC << \"\\n\";\t\t\t\\\n    stack.new_recording();\t\t\t\\\n    aReal J = FUNC(x);\t\t\t\t\\\n    Real Jp = FUNC(value(x));\t\t\t\\\n    J.set_gradient(1.0);\t\t\t\\\n    stack.reverse();\t\t\t\t\\\n    Vector dJdx = x.get_gradient();\t\t\\\n    std::cout << #FUNC << \"(x) = \"\t\t\\\n\t      << J << \"\\n\";\t\t\t\\\n    std::cout << #FUNC << \"(value(x)) = \"       \\\n\t      << Jp << \"\\n\";\t\\\n    std::cout << \"d(\" << #FUNC << \"(x))/dx = \"\t\\\n              << dJdx << \"\\n\";\t\t\t\\\n    if (J != Jp) { ++status; }\t\t        \\\n    stack.print_statements();\t\t\t\\\n  }\n\n\nint\nmain(int argc, const char** argv)\n{\n  Stack stack;\n\n  aVector x(5);\n  x << -2.0, -3.0, -1.0, -50.0, 7.0;\n\n  std::cout << \"x = \" << x << \"\\n\";\n\n  int status = 0;\n\n  TEST_REDUCE(sum);\n  TEST_REDUCE(mean);\n  TEST_REDUCE(maxval);\n  TEST_REDUCE(minval);\n  TEST_REDUCE(product);\n  TEST_REDUCE(norm2);\n\n  // Test product by hand\n  {\n    std::cout << \"\\nTESTING MANUAL PRODUCT\\n\";\n    stack.new_recording();\n    //aReal J = x(0)*x(1)*x(2)*x(3)*x(4);\n    aReal J = x(0)*x(1);\n    J *= x(2);\n    J *= x(3);\n    J *= x(4);\n    J.set_gradient(1.0);\n    stack.reverse();\n    Vector dJdx = x.get_gradient();\n    std::cout << \"manual_product(x) = \" << J << \"\\n\";\n    std::cout << \"d(manual_product(x))/x = \" << dJdx << \"\\n\";\n    stack.print_statements();\n  }\n\n  // Test norm2 by hand\n  {\n    std::cout << \"\\nTESTING MANUAL NORM2\\n\";\n    stack.new_recording();\n    aReal J = sqrt(x(0)*x(0) + x(1)*x(1) + x(2)*x(2)\n\t\t   + x(3)*x(3) + x(4)*x(4));\n    J.set_gradient(1.0);\n    stack.reverse();\n    Vector dJdx = x.get_gradient();\n    std::cout << \"manual_norm2(x) = \" << J << \"\\n\";\n    std::cout << \"d(manual_norm2(x))/x = \" << dJdx << \"\\n\";\n    stack.print_statements();\n  }\n\n  if (status != 0) {\n    std::cout << \"Error: \" << status << \" of the active/passive reduce operations are different\\n\";\n  }\n\n  return status;\n}\n\n"
  },
  {
    "path": "test/test_thread_safe.cpp",
    "content": "/* test_thread_safe.cpp - Tests that Adept is thread-safe\n\n  Copyright (C) 2012-2014 The University of Reading\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n  This program tests the thread-safety of the Adept library: compile\n  with and without ADEPT_STACK_THREAD_UNSAFE defined, and run with\n  -serial and -parallel command-line arguments.  It should crash only\n  if ADEPT_STACK_THREAD_UNSAFE is defined AND -parallel is selected.\n*/\n\n#include <iostream>\n#include <string>\n\n#ifdef _OPENMP\n#include <omp.h>\n#endif\n\n// Test what happens if thread safety is disabled by uncommenting the\n// following\n//#define ADEPT_STACK_THREAD_UNSAFE 1\n#include \"adept.h\"\n\nusing adept::adouble;\nusing adept::Real;\n\n// Number of points in spatial grid of simulation\n#define NX 128\n\n// \"Toon\" advection scheme applied to linear advection in a 1D\n// periodic domain - see Adept paper for details\nstatic\nvoid\ntoon(int nt, double c, const adouble q_init[NX], adouble q[NX]) {\n  adouble flux[NX-1];                        // Fluxes between boxes\n  for (int i=0; i<NX; i++) q[i] = q_init[i]; // Initialize q\n  for (int j=0; j<nt; j++) {                 // Main loop in time\n    for (int i=0; i<NX-1; i++) flux[i] = (exp(c*log(q[i]/q[i+1]))-1.0) \n                                         * q[i]*q[i+1] / (q[i]-q[i+1]);\n    for (int i=1; i<NX-1; i++) q[i] += flux[i-1]-flux[i];\n    q[0] = q[NX-2]; q[NX-1] = q[1];          // Treat boundary conditions\n  }\n}\n\n// Perform a simulation and compute the Jacobian two ways - this is to\n// be run in parallel to test thread safety\nstatic\nbool\ncompute(int i, int nt, double dt, double q_init_save[NX])\n{\n  bool error_occurred = false; // Return value\n\n  // Start an Adept stack before the first adouble object is\n  // constructed\n  adept::Stack s;\n\n  adouble q_init[NX];  // Initial values of field as adouble array\n  adouble q[NX];       // Final values \n  \n  // Copy initial values\n  for (int j = 0; j < NX; j++) {\n    q_init[j] = q_init_save[j];\n  }\n\n  // Do something to the data specific to the loop\n  q_init[i+5] = q_init[i+5] + 1.0;\n\n  // Start a new recording of derivative statements; note that this\n  s.new_recording();\n\n  // Run the simulation with nt timesteps\n  toon(nt, dt, q_init, q);\n\n  s.independent(q_init, NX); // Declare independents\n  s.dependent(q, NX);        // Declare dependents\n  Real jac_for[NX*NX];       // Where Jacobian will be stored from forward computation\n  Real jac_rev[NX*NX];       // Where Jacobian will be stored from reverse computation\n  // Compute Jacobian two ways\n  s.jacobian_forward(jac_for);\n  s.jacobian_reverse(jac_rev);\n    \n  double rmsd = 0.0;\n  for (int j = 0; j < NX*NX; j++) {\n    if (jac_for[j] != jac_rev[j]) {\n      double diff = jac_for[j]-jac_rev[j];\n      rmsd += diff*diff;\n    }\n    }\n  rmsd = sqrt(rmsd / (NX*NX));\n    \n#pragma omp critical\n  {\n    std::cout.flush();\n    \n#ifdef _OPENMP\n    std::cout << \"*** Iteration \" << i << \" executed by thread \" << omp_get_thread_num() \n\t      << \" (stack address \" << adept::active_stack() << \"):\\n\";\n#else\n    std::cout << \"*** Iteration \" << i \n\t      << \" (stack address \" << adept::active_stack() << \"):\\n\";\n#endif\n      \n    std::cout << \"Used maximum of \" << s.max_jacobian_threads() << \" thread(s) for Jacobian calculation\\n\";\n    \n    if (rmsd > 1.0e-5) {\n      std::cout << \"*** ERROR: Jacobian from forward and reverse computations disagree (RMSD = \"\n\t\t<< rmsd << \")\\n\";\n      error_occurred = true;\n    }\n    else {\n      std::cout << \"CORRECT BEHAVIOUR: Jacobians from forward and reverse computations agree within tolerance\\n\";\n    }\n    \n    if (i == 0) {\n      // Print information about the data held in the stack\n\tstd::cout << \"Stack status for iteration 0:\\n\"\n\t\t  << s;\n\t// Print memory information\n\tstd::cout << \"Memory usage: \" << s.memory() << \" bytes\\n\\n\";\n    }\n    \n    std::cout << \"\\n\";\n  }\n  \n  return error_occurred;\n}\n\n\nint\nmain(int argc, char** argv)\n{\n  using adept::adouble;\n\n  bool error_occurred = false;\n\n  const double pi = 4.0*atan(1.0);\n\n  // Edit these variables to change properties of simulation\n  const int nt = 200;        // Number of timesteps\n  const double dt = 0.125;   // Timestep (actually a Courant number)\n  const int ncomputations = 8;\n\n  // Initial values of field as a double array\n  double q_init_save[NX];\n\n  bool is_parallel = true;\n\n  if (argc > 1) {\n    if (std::string(\"-serial\") == argv[1]) {\n      is_parallel = false;\n    }\n    else if (std::string(\"-parallel\") == argv[1]) {\n      is_parallel = true;\n    }\n    else {\n      std::cout << \"Usage: \" << argv[0] << \" [-serial|-parallel]\\n\";\n      return 1;\n    }\n  }\n\n\n  std::cout << \"Running \" << argv[0] << \"...\\n\";\n  \n#ifdef ADEPT_STACK_THREAD_UNSAFE\n  std::cout << \"  Compiled to be THREAD UNSAFE\\n\";\n#else\n  std::cout << \"  Compiled to be THREAD SAFE\\n\";\n#endif\n\n#ifdef _OPENMP\n  std::cout << \"  \" << omp_get_num_procs() << \" processors available running maximum of \"\n\t    << omp_get_max_threads() << \" threads\\n\";\n  if (is_parallel) {\n    std::cout << \"  Performing \" << ncomputations << \" parallel computations,\\n\";\n    std::cout << \"    within which Jacobian (\" << NX << \"x\" << NX << \" matrix) calculations will be serial\\n\";\n#ifdef ADEPT_STACK_THREAD_UNSAFE\n    if (omp_get_max_threads() > 1) {\n      std::cout << \"*** You should expect this program to crash now!\\n\";\n    }\n#endif\n  }\n  else {\n    std::cout << \"  Performing \" << ncomputations << \" serial computations,\\n\";\n    std::cout << \"    within which Jacobian (\" << NX << \"x\" << NX << \" matrix) calculations will be in parallel\\n\";\n  }\n#else\n  std::cout << \"  Compiled with no OpenMP support\\n\";\n#endif\n\n  std::cout << \"\\n\";\n  std::cout.flush();\n\n\n  // Initialize the field\n  for (int i = 0; i < NX; i++) {\n    q_init_save[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+0.0001;\n  }\n\n  if (is_parallel) {\n#pragma omp parallel for\n    for (int i = 0; i < ncomputations; i++) {\n      if (compute(i, nt, dt, q_init_save)) {\n\terror_occurred = true;\n      }\n    }\n  }\n  else {\n    for (int i = 0; i < ncomputations; i++) {\n      if (compute(i, nt, dt, q_init_save)) {\n\terror_occurred = true;\n      }\n    }\n  }\n\n  if (error_occurred) {\n    std::cout << \"An error occurred\\n\";\n  }\n\n  return error_occurred;\n    \n}\n"
  },
  {
    "path": "test/test_thread_safe_arrays.cpp",
    "content": "/* test_thread_safe_arrays.cpp - Tests that Adept arrays are thread-safe\n\n  Copyright (C) 2017 ECMWF\n\n  Copying and distribution of this file, with or without modification,\n  are permitted in any medium without royalty provided the copyright\n  notice and this notice are preserved.  This file is offered as-is,\n  without any warranty.\n\n*/\n\n#ifdef _OPENMP\n#include <omp.h>\n#endif\n\n//#define ADEPT_STORAGE_THREAD_SAFE 1\n\n#include <adept_arrays.h>\n\nint main(int argc, const char** argv)\n{\n  using namespace adept;\n\n  int N = 2;\n  Matrix A(N,N);\n  SymmMatrix S(N);\n\n  Matrix B;\n  SymmMatrix T;\n#ifdef ADEPT_STORAGE_THREAD_SAFE\n  std::cout << \"Storage should be thread safe\\n\";\n  // B shares the data and increases the reference counter of the\n  // shared Storage object. If A goes out of scope, B will \"steal\" the\n  // data.\n  B >>= A;\n  T >>= S;\n#else\n  std::cout << \"Storage is not thread safe: using soft_link()\\n\";\n  // B points to the data but does not have access to the Storage\n  // object. If A goes out of scope, B will most likely point to an\n  // inaccessible memory location.\n  B >>= A.soft_link();\n  T >>= S.soft_link();\n#endif\n\n  A = 1.0; // Also seen by B\n  S = 2.0; // Also seen by S\n\n  int nthreads = 1;\n\n#ifdef _OPENMP\n  nthreads = omp_get_max_threads();\n  std::cout << omp_get_num_procs() << \" processors available running maximum of \"\n\t    << nthreads << \" threads\\n\";\n#else\n  std::cout << \"Compiled without OpenMP support: 1 thread\\n\";\n#endif\n\n  // The following almost always causes a crash if the code is not\n  // properly thread safe\n#pragma omp parallel for\n  for (int i = 0; i < N*1000; ++i) {\n\n    for (int j = 0; j < N*1000; ++j) {\n      B[j % N] = noalias(B(__, j % N)) + T.diag_vector();\n    }\n\n  }\n\n  if (nthreads > 1) {\n    std::cout << \"Parallel subsetting of array zillions of times was successful\\n\";\n  }\n  else {\n    std::cout << \"Serial subsetting of array zillions of times was successful (unsurprisingly)\\n\";\n  }\n\n  return 0;\n\n}\n"
  }
]