Repository: rjhogan/Adept-2 Branch: master Commit: d0a7751a0871 Files: 136 Total size: 1.5 MB Directory structure: gitextract_yk4ax793/ ├── .gitignore ├── .travis.yml ├── AUTHORS ├── COPYING ├── ChangeLog ├── INSTALL ├── Makefile.am ├── NEWS ├── README.md ├── TODO ├── adept/ │ ├── Array.cpp │ ├── Makefile.am │ ├── Minimizer.cpp │ ├── Stack.cpp │ ├── StackStorageOrig.cpp │ ├── Storage.cpp │ ├── cppblas.cpp │ ├── cpplapack.h │ ├── index.cpp │ ├── inv.cpp │ ├── jacobian.cpp │ ├── line_search.cpp │ ├── minimize_conjugate_gradient.cpp │ ├── minimize_levenberg_marquardt.cpp │ ├── minimize_limited_memory_bfgs.cpp │ ├── settings.cpp │ ├── solve.cpp │ └── vector_utilities.cpp ├── benchmark/ │ ├── Makefile.am │ ├── advection_schemes.h │ ├── advection_schemes_AD.h │ ├── advection_schemes_K.h │ ├── animate.cpp │ ├── autodiff_benchmark.cpp │ ├── differentiator.h │ ├── math_benchmark.cpp │ ├── matrix_benchmark.cpp │ └── nx.h ├── config_platform_independent.h.in ├── configure.ac ├── doc/ │ ├── COPYING │ ├── Makefile │ ├── README │ ├── adept_documentation.tex │ └── adept_reference.tex ├── include/ │ ├── Makefile.am │ ├── Timer.h │ ├── adept/ │ │ ├── Active.h │ │ ├── ActiveConstReference.h │ │ ├── ActiveReference.h │ │ ├── Allocator.h │ │ ├── Array.h │ │ ├── ArrayWrapper.h │ │ ├── BinaryOperation.h │ │ ├── Expression.h │ │ ├── ExpressionSize.h │ │ ├── FixedArray.h │ │ ├── GradientIndex.h │ │ ├── IndexedArray.h │ │ ├── Minimizer.h │ │ ├── Optimizable.h │ │ ├── Packet.h │ │ ├── RangeIndex.h │ │ ├── ScratchVector.h │ │ ├── SpecialMatrix.h │ │ ├── Stack.h │ │ ├── StackStorage.h │ │ ├── StackStorageOrig.h │ │ ├── StackStorageOrigStl.h │ │ ├── Statement.h │ │ ├── Storage.h │ │ ├── UnaryOperation.h │ │ ├── array_shortcuts.h │ │ ├── base.h │ │ ├── contiguous_matrix.h │ │ ├── cppblas.h │ │ ├── eval.h │ │ ├── exception.h │ │ ├── interp.h │ │ ├── inv.h │ │ ├── matmul.h │ │ ├── noalias.h │ │ ├── outer_product.h │ │ ├── quick_e.h │ │ ├── reduce.h │ │ ├── scalar_shortcuts.h │ │ ├── settings.h │ │ ├── solve.h │ │ ├── spread.h │ │ ├── store_transpose.h │ │ ├── traits.h │ │ ├── vector_utilities.h │ │ └── where.h │ ├── adept.h │ ├── adept_arrays.h │ ├── adept_fortran.h │ ├── adept_optimize.h │ └── create_adept_source_header ├── m4/ │ ├── adept.m4 │ ├── ax_blas.m4 │ ├── ax_lapack.m4 │ ├── ltsugar.m4 │ └── lt~obsolete.m4 ├── makefile_include.in └── test/ ├── Makefile ├── README ├── algorithm.cpp ├── algorithm.h ├── algorithm_with_and_without_ad.h ├── rosenbrock_banana_function.cpp ├── run_tests.sh ├── simulate_radiances.cpp ├── simulate_radiances.h ├── state.cpp ├── state.h ├── test_adept.cpp ├── test_adept_with_and_without_ad.cpp ├── test_array_derivatives.cpp ├── test_array_speed.cpp ├── test_arrays.cpp ├── test_checkpoint.cpp ├── test_constructors.cpp ├── test_derivatives.cpp ├── test_fastexp.cpp ├── test_fixed_arrays.cpp ├── test_gsl_interface.cpp ├── test_interp.cpp ├── test_minimizer.cpp ├── test_misc.cpp ├── test_no_lib.cpp ├── test_packet_operations.cpp ├── test_radiances.cpp ├── test_radiances_array.cpp ├── test_reduce_active.cpp ├── test_thread_safe.cpp └── test_thread_safe_arrays.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ Makefile.in /aclocal.m4 /config.guess /config.h.in /config.log /config.sub /config.status /configure /depcomp /install-sh /ltmain.sh /missing /ar-lib /autom4te.cache /compile /libtool /stamp-* *.o *.a *.so *.la *.tar* doc/adept_*.log doc/adept_*.toc doc/adept_*.aux doc/adept_*.out .deps *~ Makefile !test/Makefile !doc/Makefile include/adept_source.h ================================================ FILE: .travis.yml ================================================ language: cpp os: linux sudo: required dist: trusty compiler: - gcc before_install: - sudo apt-get install gfortran -y - type gfortran install: autoreconf -i && ./configure && make -j8 script: - make check -j8 - cat test/test_results.txt ================================================ FILE: AUTHORS ================================================ Robin Hogan ================================================ FILE: COPYING ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: ChangeLog ================================================ version 2.1.4 (in progress) - Added support for the copysign function - Added aArray::set_gradient(Array) function version 2.1.3 (22 Feb 2024) - Added interp2d and interp3d interpolation functions - Added option of nearest-neighbour interpolation version 2.1.2 (3 Oct 2023) - Further bug fixes to reduction of active arrays which did not have addequate space allocated by check_space, including "product" which requires an additional differential operation per element - Fixed out-of-bounds access in test_thread_safe_arrays - Slight change to reduce_dimension to avoid incorrect warning about ExpressionSize array subscript of -1 - Fixed broken benchmark/autodiff_benchmark to work with ADOL-C - Changed COMPILE_FLAGS argument order in test/Makefile in case CPPFLAGS contains Timer.h or other conflicting header file - Added benchmark/math_benchmark program version 2.1.1 (10 April 2022) - interp function can perform 1D interpolation of higher dimensional Y arrays - Bug fix in reduction of an "n" dimensional active array to an "n-1" dimensional array: check_space had been forgotten - Added Newton-Levenberg[-Marquardt] options to test_minimizer, which use the exact Hessian of the Rosenbrock banana function version 2.1 (5 February 2021) - Removed README in favour of README.md version 2.0.9 (28 January 2021) - Fix bug in Array::alignment_offset causing occasional crashes reduce and assign operations due to unaligned AVX access, now tested in test_packet_operations - Added Conjugate-Gradient and L-BFGS minimization methods, both bounded and unbounded methods - Disabled vectorization on 32-bit ARM NEON targets as there are insufficient floating-point intrinsics - Fixed interp(x,y,xi) function in case x and y have 0 or 1 elements version 2.0.8 (22 August 2020) - Added adept_optimize.h header file providing minimization capability, initially with the constrained and unconstrained Levenberg-Marquardt minimization algorithm - Test program test_minimizer tests with the N-dimensional Rosenbrock function - The Stack member function "jacobian" can now operate on or return Adept matrices, rather than solely on raw pointers which had to point to data in column-major order - Removed "using namespace internal" from several header files so that adept namespace is clean - Fixed C++98 compatibility version 2.0.7 (23 June 2020) - Added fast, vectorizable exponential function "fastexp", or can use as adept::exp if the ADEPT_FAST_EXPONENTIAL preprocessor variable is defined - Moved all the vector intrinsic stuff to quick_e.h - Added ARM-NEON support to quick_e.h - Adept is now thread safe on Mac OS versions that support the thread_local keyword - Fixed bug that caused incorrect differentiation of Active/int - Preprocessor option ADEPT_INIT_REAL_SNAN and ADEPT_INIT_REAL_ZERO initialize real numbers (and complex numbers) to signaling NaN or zero, useful for debugging - Fixed bug that caused incorrect result of maxval and minval applied to active arrays - Fixed bug that caused incorrect differentiation of "product" function - Fixed bug that caused incorrect norm2 for passive vector large enough to use vectorization version 2.0.6 (20 February 2020) - Fixed bug in hand-coded adjoint of Toon advection scheme (benchmark/advection_schemes_AD.h), as well as other bugs that would have prevented the Adjoint and hand-coded adjoints from being correct compared to each other - Fixed memory leak in Packet.h by ensuring memory is freed in the case that neither _POSIX_VERSION nor _MSC_VER are defined - Fixed bug in FixedArray.h that prevented active fixed arrays from registering themselves with the stack when initialized using an initializer list - Fixed missing "template" directives in UnaryOperation.h that prevented isfinite, isnan and isinf from working correctly on arrays - Added Array::resize_contigous functions - minval and maxval now work correctly with negative and +/-Inf arguments; previously minval gave incorrect results even for negative arguments - Added array_fortran.h to provide the ability to exchange arrays between C++/Adept and Fortran, for those Fortran compilers that support the 2018 standard - Added support for AVX512 vectorization: operations on 16 floats and 8 doubles at a time; - Added test_packet_operations to check Intel vector intrinsics correctly implemented version 2.0.5 (6 February 2018) - Use set_array_print_style(x) to set behaviour of <=4.9.1 and Clang if appropriate built-in is present; can't guarantee its presence with other compilers - Fix writing of active scalar expressions to a stream - Added missing fmin/fmax(Expr,Scalar) version 2.0.4 (8 January 2018) - Packet.h copes with undefined _mm_undefined_ps in GCC<4.9.1 - Fix Packet.h in case SSE2 not enabled - ADEPT_FAST preprocessor variable enables ADEPT_NO_DIMENSION_CHECKING, ADEPT_NO_ALIAS_CHECKING and ADEPT_STACK_THREAD_UNSAFE - Divide by scalar now only converts to multiply by (1.0/scalar) if scalar is of floating-point type; this fixes indexing with "end/2" - Fix bug in Packet.h (found by valgrind) to ensure new[] followed by delete[] and posix_memalign followed by free - Increase initial stack size from 1000 to 1024^2 - Fixed two bugs in IndexedArray.h that broke indexing a matrix with Matrix(int,intVector) - Allocated memory in non-OpenMP jacobian_forward is now freed version 2.0.3 (28 October 2017) - Replaced template class "cast" with "expr_cast" to avoid clash with Expression's non-template member function; this enables compilation with Visual C++. - Added adept::have_matrix_multiplication() and adept::have_linear_algebra() to test for BLAS and LAPACK (respectively) at run-time version 2.0.2 (21 October 2017) - Fixed standards-compliance problem with use of Expression in Curiously Recurring Template Pattern, by removing any "static const" members that referred to the derived class. This enabled the same code to work with g++, clang++ and the Intel compiler icc. version 2.0.1 (18 October 2017) - Basic passive complex arrays work, tested with test/test_complex_arrays - Added ADEPT_NO_DIMENSION_CHECKING option - Vectorized sqrt, unary-, unary+, max and min - Removed the option to vectorize with Packet representing a *pair* of SSE2/AVX packed vector; now a Packet can only represent a single packed vector. This simplifies maintenance of Packet.h, and the pair option offered no performance advantage anyway. - Vectorized reduce operations sum, product etc. - Many fixes to enable compilation with clang++ - Fixed FixedArray::operator[] for rank>1 version 2.0 (September 2017) - Finalized version for release - PDF documentation is no longer installed, so that Git users are not obliged to have pdflatex version 1.9.11 (30 September 2017) - Fixed get_gradient member function of Array and FixedArray - Added test_array_derivatives test program - Fixed indexing of FixedArrays of rank>1 - Fixed IndexedArray applied to FixedArrays (before had reference to temporary dimension object - Test and benchmarking programs now work with single precision - Stack functions accept Index passed by value rather than reference, so that "static const int" passed from FixedArray does not need to be explicitly instantiated - Active::add_derivative_dependence and append_derivative_dependence no longer only accept arguments of type "Real" - ADEPT_STORAGE_THREAD_SAFE option to protect Storage reference counter in multi-threaded environment (C++11 only) - Added Array::soft_link() as another means to get thread safety - Added test program test_thread_safe_arrays - Added adept_reference latex file to doc directory - Added "dimensions" function for creating ExpressionSize objects version 1.9.10 (25 September 2017) - Added link syntax A >>= B - Added assignment and initialization from initializer_lists for Array and FixedArray classes - Implemented Fortran-like "count" reduction function - Bug fix sending active expression to a stream with "<<" - Added "spread(array,n)" to match Fortran spread(array,dim,n) - Added outer_product(x,y) - Fixed adept_source.h for non-Unix systems - Moved mathematical functions from global to adept namespace - Fixed pausable recording and added test_adept_active_pausable - Removed unsafe ADEPT_COPY_CONSTRUCTOR_ONLY_ON_RETURN_FROM_FUNCTION - C++98 and C++11 correctly take cmath functions from :: and std:: respectively - "make check" now runs test script test/run_tests.sh - inv and solve now take general expression arguments - Enabled indexed arrays to be assigned to an initializer list - BLAS now optional (without it matrix multiplication causes run-time exception) - Added test_derivatives to test quality of derivatives for all mathematical functions - Enabled SpecialMatrix and IndexedArray to be assigned to an active scalar expression - Added fmax and fmin functions (even if C++11 not used) - Added atan2 support - C++11 on non-Mac platforms uses thread_local keyword instead of C++98 compiler extensions - Matrix multiplication on active special matrices implemented by copying them to a dense Array<2,Real,true>. Very inefficient, but it works. - Matrix multiplication on inactive triangular and "square" matrices now works by converting to them to a dense Array<2,Real,false>. - Added alias detection in IndexedArray - Alias detection in IndexedArray and SpecialMatrix can be deactivated with ADEPT_NO_ALIAS_CHECKING - Added "eval" function to evaluate an expression that might be subject to aliasing version 1.9.9 (August 2017) - Put on GitHub as rjhogan/Adept-2 - Added Expression::next_value_contiguous for faster inner loops in the case that all expressions have a contiguous and increasing inner dimension - Preliminary vectorization via Packet class and Expression::next_packet - Vectorized forward Jacobian calculation using packets - Split Expression.h into also UnaryOperation.h and BinaryOperation.h - Fixed bug in matmul.h that causes failure if matrix in matrix-vector multiplication is strided in both dimensions - Added move semantics if C++11 enabled version 1.9.8 (April 2016): - Completed FixedArray.h and tested for active arguments - Added array_shortcuts for FixedArrays: (a)VectorX, (a)MatrixXX - Added array_shortcuts for Arrays: (a)ArrayXD (for X = 3 to 7) - interp permits general Expression arguments version 1.9.7 (April 2016): - Nearly completed FixedArray.h version 1.9.6 (March 2016): - Started FixedArray.h version 1.9.5 (March 2016): - Fixed add_derivative_dependence and append_derivative_dependence when applied to elements of arrays - Added ADEPT_BOUNDS_CHECKING capability, and fixed IndexedArray to work with this - Now call BLAS and LAPACK (Fortran) routines, rather than C-BLAS and LAPACKE functions - Added matrix multiplication benchmark program - Added IndexedArray for dimensions up to 7 - Added Array::data() and Array::const_data() for direct access - Added Array::subset(); slightly more concise than using "range" version 1.9.4 (January 2016): - Completed changes to documentation in doc directory - Added control/inquiry of settings, e.g. set_max_blas_threads() and configuration() version 1.9.3 (December 2015): - Added "max" and "min" as binary operators (note that "maxval" and "minval" are reduction operators as in Fortran) version 1.9.2 (December 2015): - Added ActiveConstReference type for active constant references version 1.9.1 (November 2015): - New matmul.h/matmul.cpp - not yet complete version 1.9.0 (November 2015): - SUBSTANTIAL REWRITE TO INCORPORATE ARRAY FUNCTIONALITY version 1.1 (June 2015): - Added ./configure script using autotools - Added support for additional mathematical functions: asinh, acosh, atanh, expm1, log1p, cbrt, erf, erfc, exp2, log2 - Changed license from GNU General Public License to Apache License, Version 2.0 - Jacobian calculation uses OpenMP parallelization - Removed multiscatter example code - New benchmarking program in benchmark/ that compares to other automatic differentiation tools if available - Fixed bug so that gaps in the gradient list now merge properly - Provided capability to compile code without an external library, to facilitate porting to Windows - Added programs in test/ demonstrating checkpointing, thread-safety and compiling without an external library version 1.0 (September 2013): - Very many internal changes and added features - Detailed documentation in the doc/ directory - Removed the LIFO requirement on the order with which aReal objects ought to be created and destroyed - For users of version 0.9, the main change to the interface is that the Stack::start() member function is no longer supported; rather you should call the Stack::new_recording() member function *after* the independent variables have been initialized but *before* any mathematical operations are performed using them version 0.9: - First public release ================================================ FILE: INSTALL ================================================ Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. This file is free documentation; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package. 4. Type `make install' to install the programs and any data files and documentation. 5. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 6. Often, you can also type `make uninstall' to remove the installed files again. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of the options to `configure', and exit. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. ================================================ FILE: Makefile.am ================================================ dist_pkgdata_DATA = README.md pkgdata_DATA = COPYING ChangeLog NEWS AUTHORS SUBDIRS = adept include benchmark test # The test/ directory does not use automake so we need to specify the # files that will be included in the distribution EXTRA_DIST = test/Makefile test/README test/*.cpp test/*.h test/run_tests.sh \ doc/Makefile doc/README doc/COPYING doc/*.tex ACLOCAL_AMFLAGS = -I m4 ================================================ FILE: NEWS ================================================ version 2.0 - Fixed pausable recording and library-free compilation to provide full backwards compatibility with version 1.1 - C++11 features such as initializer lists - Automatic vectorization of passive array statements if possible - Additional mathematical functions: round, trunc, rint, nearbyint, atan2, fmin, fmax - Additional array operations: spread, outer_product, count, maxval, minval, reshape - Many more test programs version 1.9.8 (April 2016) - First beta release of version 2.0 incorporating array capability up to 7 dimensions - Matrix multiplication and basic linear from BLAS and LAPACK - Options for thread-safe accessing of arrays version 1.1 (June 2015) - Added ./configure script - Added support for additional mathematical functions: asinh, acosh, atanh, expm1, log1p, cbrt, erf, erfc, exp2, log2 - License changed to Apache License, Version 2.0 ================================================ FILE: README.md ================================================ # Adept 2: Combined array and automatic differentiation library in C++ ## Introduction The Adept version 2.1 software library provides three different functionalities: * Its automatic differentiation capability enables algorithms written in C++ to be differentiated with little code modification, very useful for a wide range of applications that involve mathematical optimization. It is backwards compatible with and as fast as Adept 1.1. The name "Adept" refers to "Automatic Differentiation using Expression Templates". * Its array capability provides support for vectors, matrices, arrays of up to 7 dimensions and linear algebra. Adept 2 uses a single expression-template framework under the hood to enable array operations to be differentiated with very good computational performance. * Its optimization capability provides the various minimization algorithms (Levenberg, Levenberg-Marquardt, Conjugate Gradient and Limited Memory BFGS) each of which can be used with or without box constraints on the state variables. The interface to the optimization functionality is in terms of Adept vectors and matrices. If you are not interested in the array or optimization capabilities of Adept 2 then Adept 1.1 may be more to your liking as a very lightweight library that has virtually all the automatic-differentiation capabilities of version 2. ## Documentation and links * The [Adept web site](http://www.met.reading.ac.uk/clouds/adept/) for formal Adept releases * The [Adept-2 GitHub page](https://github.com/rjhogan/Adept-2) for the latest snapshot * The [Adept-1.1 GitHub page](https://github.com/rjhogan/Adept) for the older (scalar) library * A detailed [User Guide](http://www.met.reading.ac.uk/clouds/adept/adept_documentation.pdf) * A paper describing the automatic differentiation capability: [Hogan, R. J., 2014: Fast reverse-mode automatic differentiation using expression templates in C++. *ACM Trans. Math. Softw.* **40,** 26:1-26:16](http://www.met.reading.ac.uk/~swrhgnrj/publications/adept.pdf) * The [Adept Wikipedia page](https://en.wikipedia.org/wiki/Adept_(C++_library)) * Bug fixes, and queries not answered by the documentation, should be addressed to Robin Hogan (r.j.hogan at ecmwf.int) ## Installation To build Adept from a GitHub snapshot, first do the following to recreate the configure script (requiring the autotools package): autoreconf -i Formal release packages already contain a configure script. The normal build sequence is then: ./configure make make check make install Please consult the User Guide for further installation options; in particular, if you plan to make serious us of matrix multiplication and linear algebra then you should compile Adept to use an optimized BLAS library such as OpenBLAS. ## License and copyright The code in this package has a mix of copyright owners: Copyright (C) 2012-2015 University of Reading Copyright (C) 2015- European Centre for Medium-Range Weather Forecasts Two licenses are used for the code in this package: * The files that form the Adept library are distributed under the conditions of the Apache License, Version 2 - see the COPYING file for details. This is a permissive free-software license but one that does impose a few conditions if you intend to distribute derivative works. The files this license applies to are those in the include/ and adept/ directories, and the subdirectories below them. * All code in the test/ and benchmark/ directories is subject to the terms of the GNU all-permissive license, given at the top of those files - basically you can do what you like with the code from these files. If you use Adept in published scientific work then it is requested that you cite the Hogan (2014) paper above, but this is not a condition of the license. ================================================ FILE: TODO ================================================ BUGS spread function does not use the right DIM DESIRABLE BUT NEEDS NEW STACK Differentiated BLAS operations on symmetric matrices etc Implement general OpenMP for forward pass OPTIMIZATION Vectorize active expressions Fix vectorization of spread and outer_product by storing pointer to start of row and not using index Communicate band diagonals statically to optimize Array = band expression (e.g. 2*TridiagMatrix) Implement active scalar precomputation Optimize reciprocal to use 1.0 or 1.0f; vectorize Optimize storage of data range SquareMatrix::is_vectorizable = true FEATURES long double calls double matmul functions? std::string configuration function returning options for this compilation unit Mathematical functions copysign, fdim, hypot, remainder? Implement user elemental function Implement user choice of Jacobian array ordering Clean-up benchmark and test_arrays/test_array_speed code Check can do Array<*,Active,false> Rename ExpressionSize Enable functions taking ExpressionSize arguments (e.g. resize and array constructor) to take equivalent arguments, e.g. std::vector, initializer lists etc Fall-back if BLAS not available Implement pow and sqr Implement non-member functions merge?, reshape, shape?, size, [un]pack(?), minloc, maxloc Implement matlab-like tile (generic repmat) plus zeros and ones Implement iterators Triangular/symmetric views Const link does not increment reference counter Cannot link non-const to const either by construction or explicit link Should reduce functions take dimensions as template arguments? reduce operations have a template version with the reduce dimension provided statically differentiate complex number operations matmul and solve on complex numbers complex functions arg, abs, real, imag etc CHECK Check Square matmul All vectorization combinations work, e.g. double/int, aligned/unaligned LHS Set whole arrays as independent/dependent Reduce RMS difference in Toon case CLEAN References to OpenMP for array operations - remove? DOCUMENTATION Document diag_vector non-member function (in reduce.h) and test in test_arrays OLDER IDEAS Clarify vector orientation when in matrix multiplication Vector orientation changed with row(), col()? Implement move semantics and make copy constructors do deep copy ADEPT_*** Implement OpenMP passive array operations Implement OpenMP active array operations Link can only be performed on empty object If new Expression types are to be added, they should provide the following interface: static const int rank_ = 0; static const int n_scratch_ = 0; static const int n_active_ = 0; static const int n_arrays_ = 0; static const bool is_active_ = false; static const bool is_vectorizable_ = true; bool get_dimensions_(ExpressionSize<0>& dim) const; std::string expression_string_() const; bool is_aliased_(const Type* mem1, const Type* mem2) const; Type value_with_len_(const Index& j, const Index& len) const; template void advance_location_(ExpressionSize& loc) const; template Type value_at_location_(const ExpressionSize& loc) const; template Packet packet_at_location_(const ExpressionSize& loc) const; template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const; template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const; template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const; template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, const MyType& multiplier) const; template void set_location_(const ExpressionSize& i, ExpressionSize& index) const; ================================================ FILE: adept/Array.cpp ================================================ /* Array.cpp -- Functions and global variables controlling array behaviour Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Robin Hogan This file is part of the Adept library. */ #include namespace adept { namespace internal { bool array_row_major_order = true; // bool array_print_curly_brackets = true; // Variables describing how arrays are written to a stream ArrayPrintStyle array_print_style = PRINT_STYLE_CURLY; std::string vector_separator = ", "; std::string vector_print_before = "{"; std::string vector_print_after = "}"; std::string array_opening_bracket = "{"; std::string array_closing_bracket = "}"; std::string array_contiguous_separator = ", "; std::string array_non_contiguous_separator = ",\n"; std::string array_print_before = "\n{"; std::string array_print_after = "}"; std::string array_print_empty_before = "(empty rank-"; std::string array_print_empty_after = " array)"; bool array_print_indent = true; bool array_print_empty_rank = true; } void set_array_print_style(ArrayPrintStyle ps) { using namespace internal; switch (ps) { case PRINT_STYLE_PLAIN: vector_separator = " "; vector_print_before = ""; vector_print_after = ""; array_opening_bracket = ""; array_closing_bracket = ""; array_contiguous_separator = " "; array_non_contiguous_separator = "\n"; array_print_before = ""; array_print_after = ""; array_print_empty_before = "(empty rank-"; array_print_empty_after = " array)"; array_print_indent = false; array_print_empty_rank = true; break; case PRINT_STYLE_CSV: vector_separator = ", "; vector_print_before = ""; vector_print_after = ""; array_opening_bracket = ""; array_closing_bracket = ""; array_contiguous_separator = ", "; array_non_contiguous_separator = "\n"; array_print_before = ""; array_print_after = ""; array_print_empty_before = "empty"; array_print_empty_after = ""; array_print_indent = false; array_print_empty_rank = false; break; case PRINT_STYLE_MATLAB: vector_separator = " "; vector_print_before = "["; vector_print_after = "]"; array_opening_bracket = "["; array_closing_bracket = "]"; array_contiguous_separator = " "; array_non_contiguous_separator = ";\n"; array_print_before = "["; array_print_after = "]"; array_print_empty_before = "["; array_print_empty_after = "]"; array_print_indent = true; array_print_empty_rank = false; break; case PRINT_STYLE_CURLY: vector_separator = ", "; vector_print_before = "{"; vector_print_after = "}"; array_opening_bracket = "{"; array_closing_bracket = "}"; array_contiguous_separator = ", "; array_non_contiguous_separator = ",\n"; array_print_before = "\n{"; array_print_after = "}"; array_print_empty_before = "(empty rank-"; array_print_empty_after = " array)"; array_print_indent = true; array_print_empty_rank = true; break; default: throw invalid_operation("Array print style not understood"); } array_print_style = ps; } } ================================================ FILE: adept/Makefile.am ================================================ lib_LTLIBRARIES = libadept.la libadept_la_SOURCES = Array.cpp Stack.cpp StackStorageOrig.cpp \ jacobian.cpp Storage.cpp index.cpp settings.cpp \ cppblas.cpp cpplapack.h solve.cpp inv.cpp \ vector_utilities.cpp Minimizer.cpp \ minimize_limited_memory_bfgs.cpp minimize_levenberg_marquardt.cpp \ minimize_conjugate_gradient.cpp line_search.cpp libadept_la_CPPFLAGS = -I@top_srcdir@/include ================================================ FILE: adept/Minimizer.cpp ================================================ /* Minimizer.h -- class for minimizing the cost function of an optimizable object Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include namespace adept { // List of the names of available minimizer algorithms static const char* minimizer_algorithm_names_[] = {"L-BFGS", "Conjugate-Gradient", "Conjugate-Gradient-FR", "Levenberg", "Levenberg-Marquardt"}; // Lower-case versions of the list above static const char* minimizer_algorithm_lower_names_[] = {"l-bfgs", "conjugate-gradient", "conjugate-gradient-fr", "levenberg", "levenberg-marquardt"}; // Convert to lower case, and convert spaces and underscores to // hyphens. This function is used to do a case-insensitive // string-based selection of the minimizer algorithm to use. static void to_lower_in_place(std::string& str) { for (std::string::size_type istr = 0; istr < str.size(); ++istr) { str[istr] = std::tolower(str[istr]); if (str[istr] == ' ' || str[istr] == '_') { str[istr] = '-'; } } } // Return a C string describing the minimizer status const char* minimizer_status_string(MinimizerStatus status) { switch (status) { case MINIMIZER_STATUS_SUCCESS: return "Converged"; break; case MINIMIZER_STATUS_EMPTY_STATE: return "Empty state vector, no minimization performed"; break; case MINIMIZER_STATUS_MAX_ITERATIONS_REACHED: return "Maximum iterations reached"; break; case MINIMIZER_STATUS_FAILED_TO_CONVERGE: return "Failed to converge"; break; case MINIMIZER_STATUS_DIRECTION_UPHILL: return "Search direction points uphill"; break; case MINIMIZER_STATUS_BOUND_REACHED: return "Bound reached"; // Should not be returned from a minimize function break; case MINIMIZER_STATUS_INVALID_COST_FUNCTION: return "Non-finite cost function"; break; case MINIMIZER_STATUS_INVALID_GRADIENT: return "Non-finite gradient"; break; case MINIMIZER_STATUS_INVALID_BOUNDS: return "Invalid bounds for bounded minimization"; break; case MINIMIZER_STATUS_NOT_YET_CONVERGED: return "Minimization still in progress"; break; default: return "Status unrecognized"; } } // Case-insensitive setting of the miminization algorithm given its // name void Minimizer::set_algorithm(const std::string& algo) { std::string algo_lower = algo; to_lower_in_place(algo_lower); std::cout << "Checking \"" << algo_lower << "\"\n"; for (int ialgo = 0; ialgo < static_cast(MINIMIZER_ALGORITHM_NUMBER_AVAILABLE); ++ialgo) { if (algo_lower == minimizer_algorithm_lower_names_[ialgo]) { set_algorithm(static_cast(ialgo)); return; } } throw optimization_exception("Algorithm name not understood"); } std::string Minimizer::algorithm_name() { int ialgo = static_cast(algorithm_); if (ialgo >= 0 && ialgo < MINIMIZER_ALGORITHM_NUMBER_AVAILABLE) { return minimizer_algorithm_names_[ialgo]; } else { return "Unknown"; } } // Unconstrained minimization MinimizerStatus Minimizer::minimize(Optimizable& optimizable, Vector x) { if (minimizer_algorithm_order(algorithm_) > 1 && !optimizable.provides_derivative(2)) { throw optimization_exception("2nd-order minimization algorithm requires optimizable that can provide 2nd derivatives"); } else if (algorithm_ == MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS) { return minimize_limited_memory_bfgs(optimizable, x); } else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT) { return minimize_conjugate_gradient(optimizable, x); } else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR) { return minimize_conjugate_gradient(optimizable, x, true); } else if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG) { return minimize_levenberg_marquardt(optimizable, x, true); } else if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT) { return minimize_levenberg_marquardt(optimizable, x, false); } else { throw optimization_exception("Minimization algorithm not recognized"); } } // Constrained minimization MinimizerStatus Minimizer::minimize(Optimizable& optimizable, Vector x, const Vector& x_lower, const Vector& x_upper) { if (minimizer_algorithm_order(algorithm_) > 1 && !optimizable.provides_derivative(2)) { throw optimization_exception("2nd-order minimization algorithm requires optimizable that can provide 2nd derivatives"); } if (algorithm_ == MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS) { return minimize_limited_memory_bfgs_bounded(optimizable, x, x_lower, x_upper); } else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT) { return minimize_conjugate_gradient_bounded(optimizable, x, x_lower, x_upper); } else if (algorithm_ == MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR) { return minimize_conjugate_gradient_bounded(optimizable, x, x_lower, x_upper, true); } if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG) { return minimize_levenberg_marquardt_bounded(optimizable, x, x_lower, x_upper, true); } if (algorithm_ == MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT) { return minimize_levenberg_marquardt_bounded(optimizable, x, x_lower, x_upper, false); } else { throw optimization_exception("Constrained minimization algorithm not recognized"); } } }; ================================================ FILE: adept/Stack.cpp ================================================ /* Stack.cpp -- Stack for storing automatic differentiation information Copyright (C) 2012-2014 University of Reading Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include // For memcpy #ifdef _OPENMP #include #endif #include namespace adept { using namespace internal; // Global pointers to the current thread, the second of which is // thread safe. The first is only used if ADEPT_STACK_THREAD_UNSAFE // is defined. ADEPT_THREAD_LOCAL Stack* _stack_current_thread = 0; Stack* _stack_current_thread_unsafe = 0; // MEMBER FUNCTIONS OF THE STACK CLASS // Destructor: frees dynamically allocated memory (if any) Stack::~Stack() { // If this is the currently active stack then set to NULL as // "this" is shortly to become invalid if (is_thread_unsafe_) { if (_stack_current_thread_unsafe == this) { _stack_current_thread_unsafe = 0; } } else if (_stack_current_thread == this) { _stack_current_thread = 0; } #ifndef ADEPT_STACK_STORAGE_STL if (gradient_) { delete[] gradient_; } #endif } // Make this stack "active" by copying its "this" pointer to a // global variable; this makes it the stack that aReal objects // subsequently interact with when being created and participating // in mathematical expressions void Stack::activate() { // Check that we don't already have an active stack in this thread if ((is_thread_unsafe_ && _stack_current_thread_unsafe && _stack_current_thread_unsafe != this) || ((!is_thread_unsafe_) && _stack_current_thread && _stack_current_thread != this)) { throw(stack_already_active()); } else { if (!is_thread_unsafe_) { _stack_current_thread = this; } else { _stack_current_thread_unsafe = this; } } } // Set the maximum number of threads to be used in Jacobian // calculations, if possible. A value of 1 indicates that OpenMP // will not be used, while a value of 0 indicates that the number // will match the number of available processors. Returns the // maximum that will be used, which will be 1 if the Adept library // was compiled without OpenMP support. Note that a value of 1 will // disable the use of OpenMP with Adept, so Adept will then use no // OpenMP directives or function calls. Note that if in your program // you use OpenMP with each thread performing automatic // differentiaion with its own independent Adept stack, then // typically only one OpenMP thread is available for each Jacobian // calculation, regardless of whether you call this function. int Stack::set_max_jacobian_threads(int n) { #ifdef _OPENMP if (have_openmp_) { if (n == 1) { openmp_manually_disabled_ = true; return 1; } else if (n < 1) { openmp_manually_disabled_ = false; omp_set_num_threads(omp_get_num_procs()); return omp_get_max_threads(); } else { openmp_manually_disabled_ = false; omp_set_num_threads(n); return omp_get_max_threads(); } } #endif return 1; } // Return maximum number of OpenMP threads to be used in Jacobian // calculation int Stack::max_jacobian_threads() const { #ifdef _OPENMP if (have_openmp_) { if (openmp_manually_disabled_) { return 1; } else { return omp_get_max_threads(); } } #endif return 1; } // Perform to adjoint computation (reverse mode). It is assumed that // some gradients have been assigned already, otherwise the function // returns with an error. void Stack::compute_adjoint() { if (gradients_are_initialized()) { // Loop backwards through the derivative statements for (uIndex ist = n_statements_-1; ist > 0; ist--) { const Statement& statement = statement_[ist]; // We copy the RHS gradient (LHS in the original derivative // statement but swapped in the adjoint equivalent) to "a" in // case it appears on the LHS in any of the following statements Real a = gradient_[statement.index]; gradient_[statement.index] = 0.0; // By only looping if a is non-zero we gain a significant speed-up if (a != 0.0) { // Loop over operations for (uIndex i = statement_[ist-1].end_plus_one; i < statement.end_plus_one; i++) { gradient_[index_[i]] += multiplier_[i]*a; } } } } else { throw(gradients_not_initialized()); } } // Perform tangent linear computation (forward mode). It is assumed // that some gradients have been assigned already, otherwise the // function returns with an error. void Stack::compute_tangent_linear() { if (gradients_are_initialized()) { // Loop forward through the statements for (uIndex ist = 1; ist < n_statements_; ist++) { const Statement& statement = statement_[ist]; // We copy the LHS to "a" in case it appears on the RHS in any // of the following statements Real a = 0.0; for (uIndex i = statement_[ist-1].end_plus_one; i < statement.end_plus_one; i++) { a += multiplier_[i]*gradient_[index_[i]]; } gradient_[statement.index] = a; } } else { throw(gradients_not_initialized()); } } // Register n gradients uIndex Stack::do_register_gradients(const uIndex& n) { n_gradients_registered_ += n; if (!gap_list_.empty()) { uIndex return_val; // Insert in a gap, if there is one big enough for (GapListIterator it = gap_list_.begin(); it != gap_list_.end(); it++) { uIndex len = it->end + 1 - it->start; if (len > n) { // Gap a bit larger than needed: reduce its size return_val = it->start; it->start += n; return return_val; } else if (len == n) { // Gap exactly the size needed: fill it and remove from list return_val = it->start; if (most_recent_gap_ == it) { gap_list_.erase(it); most_recent_gap_ = gap_list_.end(); } else { gap_list_.erase(it); } return return_val; } } } // No suitable gap found; instead add to end of gradient vector i_gradient_ += n; if (i_gradient_ > max_gradient_) { max_gradient_ = i_gradient_; } return i_gradient_ - n; } // If an aReal object is deleted, its gradient_index is // unregistered from the stack. If this is at the top of the stack // then this is easy and is done inline; this is the usual case // since C++ trys to deallocate automatic objects in the reverse // order to that in which they were allocated. If it is not at the // top of the stack then a non-inline function is called to ensure // that the gap list is adjusted correctly. void Stack::unregister_gradient_not_top(const uIndex& gradient_index) { enum { ADDED_AT_BASE, ADDED_AT_TOP, NEW_GAP, NOT_FOUND } status = NOT_FOUND; // First try to find if the unregistered element is at the // start or end of an existing gap if (!gap_list_.empty() && most_recent_gap_ != gap_list_.end()) { // We have a "most recent" gap - check whether the gradient // to be unregistered is here Gap& current_gap = *most_recent_gap_; if (gradient_index == current_gap.start - 1) { current_gap.start--; status = ADDED_AT_BASE; } else if (gradient_index == current_gap.end + 1) { current_gap.end++; status = ADDED_AT_TOP; } // Should we check for erroneous removal from middle of gap? } if (status == NOT_FOUND) { // Search other gaps for (GapListIterator it = gap_list_.begin(); it != gap_list_.end(); it++) { if (gradient_index <= it->end + 1) { // Gradient to unregister is either within the gap // referenced by iterator "it", or it is between "it" // and the previous gap in the list if (gradient_index == it->start - 1) { status = ADDED_AT_BASE; it->start--; most_recent_gap_ = it; } else if (gradient_index == it->end + 1) { status = ADDED_AT_TOP; it->end++; most_recent_gap_ = it; } else { // Insert a new gap of width 1; note that list::insert // inserts *before* the specified location most_recent_gap_ = gap_list_.insert(it, Gap(gradient_index)); status = NEW_GAP; } break; } } if (status == NOT_FOUND) { gap_list_.push_back(Gap(gradient_index)); most_recent_gap_ = gap_list_.end(); most_recent_gap_--; } } // Finally check if gaps have merged if (status == ADDED_AT_BASE && most_recent_gap_ != gap_list_.begin()) { // Check whether the gap has merged with the next one GapListIterator it = most_recent_gap_; it--; if (it->end == most_recent_gap_->start - 1) { // Merge two gaps most_recent_gap_->start = it->start; gap_list_.erase(it); } } else if (status == ADDED_AT_TOP) { GapListIterator it = most_recent_gap_; it++; if (it != gap_list_.end() && it->start == most_recent_gap_->end + 1) { // Merge two gaps most_recent_gap_->end = it->end; gap_list_.erase(it); } } } // Unregister n gradients starting at gradient_index void Stack::unregister_gradients(const uIndex& gradient_index, const uIndex& n) { n_gradients_registered_ -= n; if (gradient_index+n == i_gradient_) { // Gradient to be unregistered is at the top of the stack i_gradient_ -= n; if (!gap_list_.empty()) { Gap& last_gap = gap_list_.back(); if (i_gradient_ == last_gap.end+1) { // We have unregistered the elements between the "gap" of // unregistered element and the top of the stack, so can set // the variables indicating the presence of the gap to zero i_gradient_ = last_gap.start; GapListIterator it = gap_list_.end(); it--; if (most_recent_gap_ == it) { most_recent_gap_ = gap_list_.end(); } gap_list_.pop_back(); } } } else { // Gradients to be unregistered not at top of stack. enum { ADDED_AT_BASE, ADDED_AT_TOP, NEW_GAP, NOT_FOUND } status = NOT_FOUND; // First try to find if the unregistered element is at the start // or end of an existing gap if (!gap_list_.empty() && most_recent_gap_ != gap_list_.end()) { // We have a "most recent" gap - check whether the gradient // to be unregistered is here Gap& current_gap = *most_recent_gap_; if (gradient_index == current_gap.start - n) { current_gap.start -= n; status = ADDED_AT_BASE; } else if (gradient_index == current_gap.end + 1) { current_gap.end += n; status = ADDED_AT_TOP; } /* else if (gradient_index > current_gap.start - n && gradient_index < current_gap.end + 1) { std::cout << "** Attempt to find " << gradient_index << " in gaps "; print_gaps(); std::cout << "\n"; throw invalid_operation("Gap list corruption"); } */ // Should we check for erroneous removal from middle of gap? } if (status == NOT_FOUND) { // Search other gaps for (GapListIterator it = gap_list_.begin(); it != gap_list_.end(); it++) { if (gradient_index <= it->end + 1) { // Gradient to unregister is either within the gap // referenced by iterator "it", or it is between "it" and // the previous gap in the list if (gradient_index == it->start - n) { status = ADDED_AT_BASE; it->start -= n; most_recent_gap_ = it; } else if (gradient_index == it->end + 1) { status = ADDED_AT_TOP; it->end += n; most_recent_gap_ = it; } /* else if (gradient_index > it->start - n) { std::cout << "*** Attempt to find " << gradient_index << " in gaps "; print_gaps(); std::cout << "\n"; throw invalid_operation("Gap list corruption"); } */ else { // Insert a new gap; note that list::insert inserts // *before* the specified location most_recent_gap_ = gap_list_.insert(it, Gap(gradient_index, gradient_index+n-1)); status = NEW_GAP; } break; } } if (status == NOT_FOUND) { gap_list_.push_back(Gap(gradient_index, gradient_index+n-1)); most_recent_gap_ = gap_list_.end(); most_recent_gap_--; } } // Finally check if gaps have merged if (status == ADDED_AT_BASE && most_recent_gap_ != gap_list_.begin()) { // Check whether the gap has merged with the next one GapListIterator it = most_recent_gap_; it--; if (it->end == most_recent_gap_->start - 1) { // Merge two gaps most_recent_gap_->start = it->start; gap_list_.erase(it); } } else if (status == ADDED_AT_TOP) { GapListIterator it = most_recent_gap_; it++; if (it != gap_list_.end() && it->start == most_recent_gap_->end + 1) { // Merge two gaps most_recent_gap_->end = it->end; gap_list_.erase(it); } } } } // Print each derivative statement to the specified stream (standard // output if omitted) void Stack::print_statements(std::ostream& os) const { for (uIndex ist = 1; ist < n_statements_; ist++) { const Statement& statement = statement_[ist]; os << ist << ": d[" << statement.index << "] = "; if (statement_[ist-1].end_plus_one == statement_[ist].end_plus_one) { os << "0\n"; } else { for (uIndex i = statement_[ist-1].end_plus_one; i < statement.end_plus_one; i++) { os << " + " << multiplier_[i] << "*d[" << index_[i] << "]"; } os << "\n"; } } } // Print the current gradient list to the specified stream (standard // output if omitted) bool Stack::print_gradients(std::ostream& os) const { if (gradients_are_initialized()) { for (uIndex i = 0; i < max_gradient_; i++) { if (i%10 == 0) { if (i != 0) { os << "\n"; } os << i << ":"; } os << " " << gradient_[i]; } os << "\n"; return true; } else { os << "No gradients initialized\n"; return false; } } // Print the list of gaps in the gradient list to the specified // stream (standard output if omitted) void Stack::print_gaps(std::ostream& os) const { for (std::list::const_iterator it = gap_list_.begin(); it != gap_list_.end(); it++) { os << it->start << "-" << it->end << " "; } } #ifndef ADEPT_STACK_STORAGE_STL // Initialize the vector of gradients ready for the adjoint // calculation void Stack::initialize_gradients() { if (max_gradient_ > 0) { if (n_allocated_gradients_ < max_gradient_) { if (gradient_) { delete[] gradient_; } gradient_ = new Real[max_gradient_]; n_allocated_gradients_ = max_gradient_; } for (uIndex i = 0; i < max_gradient_; i++) { gradient_[i] = 0.0; } } gradients_initialized_ = true; } #else void Stack::initialize_gradients() { gradient_.resize(max_gradient_+10, 0.0); gradients_initialized_ = true; } #endif // Report information about the stack to the specified stream, or // standard output if omitted; note that this is synonymous with // sending the Stack object to a stream using the "<<" operator. void Stack::print_status(std::ostream& os) const { os << "Automatic Differentiation Stack (address " << this << "):\n"; if ((!is_thread_unsafe_) && _stack_current_thread == this) { os << " Currently attached - thread safe\n"; } else if (is_thread_unsafe_ && _stack_current_thread_unsafe == this) { os << " Currently attached - thread unsafe\n"; } else { os << " Currently detached\n"; } os << " Recording status:\n"; if (is_recording_) { os << " Recording is ON\n"; } else { os << " Recording is PAUSED\n"; } // Account for the null statement at the start by subtracting one os << " " << n_statements()-1 << " statements (" << n_allocated_statements() << " allocated)"; os << " and " << n_operations() << " operations (" << n_allocated_operations() << " allocated)\n"; os << " " << n_gradients_registered() << " gradients currently registered "; os << "and a total of " << max_gradients() << " needed (current index " << i_gradient() << ")\n"; if (gap_list_.empty()) { os << " Gradient list has no gaps\n"; } else { os << " Gradient list has " << gap_list_.size() << " gaps ("; print_gaps(os); os << ")\n"; } os << " Computation status:\n"; if (gradients_are_initialized()) { os << " " << max_gradients() << " gradients assigned (" << n_allocated_gradients() << " allocated)\n"; } else { os << " 0 gradients assigned (" << n_allocated_gradients() << " allocated)\n"; } os << " Jacobian size: " << n_dependents() << "x" << n_independents() << "\n"; if (n_dependents() <= 10 && n_independents() <= 10) { os << " Independent indices:"; for (std::size_t i = 0; i < independent_index_.size(); ++i) { os << " " << independent_index_[i]; } os << "\n Dependent indices: "; for (std::size_t i = 0; i < dependent_index_.size(); ++i) { os << " " << dependent_index_[i]; } os << "\n"; } #ifdef _OPENMP if (have_openmp_) { if (openmp_manually_disabled_) { os << " Parallel Jacobian calculation manually disabled\n"; } else { os << " Parallel Jacobian calculation can use up to " << omp_get_max_threads() << " threads\n"; os << " Each thread treats " << ADEPT_MULTIPASS_SIZE << " (in)dependent variables\n"; } } else { #endif os << " Parallel Jacobian calculation not available\n"; #ifdef _OPENMP } #endif } } // End namespace adept ================================================ FILE: adept/StackStorageOrig.cpp ================================================ /* StackStorageOrig.cpp -- Original storage of stacks using STL containers Copyright (C) 2014-2015 University of Reading Author: Robin Hogan This file is part of the Adept library. The Stack class inherits from a class providing the storage (and interface to the storage) for the derivative statements that are accumulated during the execution of an algorithm. The derivative statements are held in two stacks described by Hogan (2014): the "statement stack" and the "operation stack". This file provides one of the original storage engine, which used std::vector to hold the two stacks. Note that these stacks are contiguous in memory, which is not ideal for very large algorithms. */ #include #include namespace adept { namespace internal { StackStorageOrig::~StackStorageOrig() { if (statement_) { delete[] statement_; } if (multiplier_) { delete[] multiplier_; } if (index_) { delete[] index_; } } // Double the size of the operation stack, or grow it even more if // the requested minimum number of extra entries (min) is greater // than this would allow void StackStorageOrig::grow_operation_stack(uIndex min) { uIndex new_size = 2*n_allocated_operations_; if (min > 0 && new_size < n_allocated_operations_+min) { new_size += min; } Real* new_multiplier = new Real[new_size]; uIndex* new_index = new uIndex[new_size]; std::memcpy(new_multiplier, multiplier_, n_operations_*sizeof(Real)); std::memcpy(new_index, index_, n_operations_*sizeof(uIndex)); delete[] multiplier_; delete[] index_; multiplier_ = new_multiplier; index_ = new_index; n_allocated_operations_ = new_size; } // ... likewise for the statement stack void StackStorageOrig::grow_statement_stack(uIndex min) { uIndex new_size = 2*n_allocated_statements_; if (min > 0 && new_size < n_allocated_statements_+min) { new_size += min; } Statement* new_statement = new Statement[new_size]; std::memcpy(new_statement, statement_, n_statements_*sizeof(Statement)); delete[] statement_; statement_ = new_statement; n_allocated_statements_ = new_size; } } } ================================================ FILE: adept/Storage.cpp ================================================ /* Storage.cpp -- Global variables recording use of Storage objects Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include namespace adept { namespace internal { Index n_storage_objects_created_; Index n_storage_objects_deleted_; } } ================================================ FILE: adept/cppblas.cpp ================================================ /* cppblas.cpp -- C++ interface to BLAS functions Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. This file provides a C++ interface to selected Level-2 and -3 BLAS functions in which the precision of the arguments (float versus double) is inferred via overloading */ #include #include #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifdef HAVE_BLAS extern "C" { void sgemm_(const char* TransA, const char* TransB, const int* M, const int* N, const int* K, const float* alpha, const float* A, const int* lda, const float* B, const int* ldb, const float* beta, const float* C, const int* ldc); void dgemm_(const char* TransA, const char* TransB, const int* M, const int* N, const int* K, const double* alpha, const double* A, const int* lda, const double* B, const int* ldb, const double* beta, const double* C, const int* ldc); void sgemv_(const char* TransA, const int* M, const int* N, const float* alpha, const float* A, const int* lda, const float* X, const int* incX, const float* beta, const float* Y, const int* incY); void dgemv_(const char* TransA, const int* M, const int* N, const double* alpha, const double* A, const int* lda, const double* X, const int* incX, const double* beta, const double* Y, const int* incY); void ssymm_(const char* side, const char* uplo, const int* M, const int* N, const float* alpha, const float* A, const int* lda, const float* B, const int* ldb, const float* beta, float* C, const int* ldc); void dsymm_(const char* side, const char* uplo, const int* M, const int* N, const double* alpha, const double* A, const int* lda, const double* B, const int* ldb, const double* beta, double* C, const int* ldc); void ssymv_(const char* uplo, const int* N, const float* alpha, const float* A, const int* lda, const float* X, const int* incX, const float* beta, const float* Y, const int* incY); void dsymv_(const char* uplo, const int* N, const double* alpha, const double* A, const int* lda, const double* X, const int* incX, const double* beta, const double* Y, const int* incY); void sgbmv_(const char* TransA, const int* M, const int* N, const int* kl, const int* ku, const float* alpha, const float* A, const int* lda, const float* X, const int* incX, const float* beta, const float* Y, const int* incY); void dgbmv_(const char* TransA, const int* M, const int* N, const int* kl, const int* ku, const double* alpha, const double* A, const int* lda, const double* X, const int* incX, const double* beta, const double* Y, const int* incY); } namespace adept { namespace internal { // Matrix-matrix multiplication for general dense matrices #define ADEPT_DEFINE_GEMM(T, FUNC, FUNC_COMPLEX) \ void cppblas_gemm(BLAS_ORDER Order, \ BLAS_TRANSPOSE TransA, \ BLAS_TRANSPOSE TransB, \ int M, int N, \ int K, T alpha, const T *A, \ int lda, const T *B, int ldb, \ T beta, T *C, int ldc) { \ if (Order == BlasColMajor) { \ FUNC(&TransA, &TransB, &M, &N, &K, &alpha, A, &lda, \ B, &ldb, &beta, C, &ldc); \ } \ else { \ FUNC(&TransB, &TransA, &N, &M, &K, &alpha, B, &ldb, \ A, &lda, &beta, C, &ldc); \ } \ } ADEPT_DEFINE_GEMM(double, dgemm_, zgemm_) ADEPT_DEFINE_GEMM(float, sgemm_, cgemm_) #undef ADEPT_DEFINE_GEMM // Matrix-vector multiplication for a general dense matrix #define ADEPT_DEFINE_GEMV(T, FUNC, FUNC_COMPLEX) \ void cppblas_gemv(const BLAS_ORDER Order, \ const BLAS_TRANSPOSE TransA, \ const int M, const int N, \ const T alpha, const T *A, const int lda, \ const T *X, const int incX, const T beta, \ T *Y, const int incY) { \ if (Order == BlasColMajor) { \ FUNC(&TransA, &M, &N, &alpha, A, &lda, X, &incX, \ &beta, Y, &incY); \ } \ else { \ BLAS_TRANSPOSE TransNew \ = TransA == BlasTrans ? BlasNoTrans : BlasTrans; \ FUNC(&TransNew, &N, &M, &alpha, A, &lda, X, &incX, \ &beta, Y, &incY); \ } \ } ADEPT_DEFINE_GEMV(double, dgemv_, zgemv_) ADEPT_DEFINE_GEMV(float, sgemv_, cgemv_) #undef ADEPT_DEFINE_GEMV // Matrix-matrix multiplication where matrix A is symmetric // FIX! CHECK ROW MAJOR VERSION IS RIGHT #define ADEPT_DEFINE_SYMM(T, FUNC, FUNC_COMPLEX) \ void cppblas_symm(const BLAS_ORDER Order, \ const BLAS_SIDE Side, \ const BLAS_UPLO Uplo, \ const int M, const int N, \ const T alpha, const T *A, const int lda, \ const T *B, const int ldb, const T beta, \ T *C, const int ldc) { \ if (Order == BlasColMajor) { \ FUNC(&Side, &Uplo, &M, &N, &alpha, A, &lda, \ B, &ldb, &beta, C, &ldc); \ } \ else { \ BLAS_SIDE SideNew = Side == BlasLeft ? BlasRight : BlasLeft; \ BLAS_UPLO UploNew = Uplo == BlasUpper ? BlasLower : BlasUpper; \ FUNC(&SideNew, &UploNew, &N, &M, &alpha, A, &lda, \ B, &ldb, &beta, C, &ldc); \ } \ } ADEPT_DEFINE_SYMM(double, dsymm_, zsymm_) ADEPT_DEFINE_SYMM(float, ssymm_, csymm_) #undef ADEPT_DEFINE_SYMM // Matrix-vector multiplication where the matrix is symmetric #define ADEPT_DEFINE_SYMV(T, FUNC, FUNC_COMPLEX) \ void cppblas_symv(const BLAS_ORDER Order, \ const BLAS_UPLO Uplo, \ const int N, const T alpha, const T *A, \ const int lda, const T *X, const int incX, \ const T beta, T *Y, const int incY) { \ if (Order == BlasColMajor) { \ FUNC(&Uplo, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); \ } \ else { \ BLAS_UPLO UploNew = Uplo == BlasUpper ? BlasLower : BlasUpper; \ FUNC(&UploNew, &N, &alpha, A, &lda, X, &incX, &beta, Y, &incY); \ } \ } ADEPT_DEFINE_SYMV(double, dsymv_, zsymv_) ADEPT_DEFINE_SYMV(float, ssymv_, csymv_) #undef ADEPT_DEFINE_SYMV // Matrix-vector multiplication for a general band matrix #define ADEPT_DEFINE_GBMV(T, FUNC, FUNC_COMPLEX) \ void cppblas_gbmv(const BLAS_ORDER Order, \ const BLAS_TRANSPOSE TransA, \ const int M, const int N, \ const int KL, const int KU, const T alpha,\ const T *A, const int lda, const T *X, \ const int incX, const T beta, T *Y, \ const int incY) { \ if (Order == BlasColMajor) { \ FUNC(&TransA, &M, &N, &KL, &KU, &alpha, A, &lda, \ X, &incX, &beta, Y, &incY); \ } \ else { \ BLAS_TRANSPOSE TransNew \ = TransA == BlasTrans ? BlasNoTrans : BlasTrans; \ FUNC(&TransNew, &N, &M, &KU, &KL, &alpha, A, &lda, \ X, &incX, &beta, Y, &incY); \ } \ } ADEPT_DEFINE_GBMV(double, dgbmv_, zgbmv_) ADEPT_DEFINE_GBMV(float, sgbmv_, cgbmv_) #undef ADEPT_DEFINE_GBMV } // End namespace internal } // End namespace adept #else // Don't have BLAS namespace adept { namespace internal { // Matrix-matrix multiplication for general dense matrices #define ADEPT_DEFINE_GEMM(T, FUNC, FUNC_COMPLEX) \ void cppblas_gemm(BLAS_ORDER Order, \ BLAS_TRANSPOSE TransA, \ BLAS_TRANSPOSE TransB, \ int M, int N, \ int K, T alpha, const T *A, \ int lda, const T *B, int ldb, \ T beta, T *C, int ldc) { \ throw feature_not_available("Cannot perform matrix-matrix multiplication because compiled without BLAS"); \ } ADEPT_DEFINE_GEMM(double, dgemm_, zgemm_) ADEPT_DEFINE_GEMM(float, sgemm_, cgemm_) #undef ADEPT_DEFINE_GEMM // Matrix-vector multiplication for a general dense matrix #define ADEPT_DEFINE_GEMV(T, FUNC, FUNC_COMPLEX) \ void cppblas_gemv(const BLAS_ORDER Order, \ const BLAS_TRANSPOSE TransA, \ const int M, const int N, \ const T alpha, const T *A, const int lda, \ const T *X, const int incX, const T beta, \ T *Y, const int incY) { \ throw feature_not_available("Cannot perform matrix-vector multiplication because compiled without BLAS"); \ } ADEPT_DEFINE_GEMV(double, dgemv_, zgemv_) ADEPT_DEFINE_GEMV(float, sgemv_, cgemv_) #undef ADEPT_DEFINE_GEMV // Matrix-matrix multiplication where matrix A is symmetric // FIX! CHECK ROW MAJOR VERSION IS RIGHT #define ADEPT_DEFINE_SYMM(T, FUNC, FUNC_COMPLEX) \ void cppblas_symm(const BLAS_ORDER Order, \ const BLAS_SIDE Side, \ const BLAS_UPLO Uplo, \ const int M, const int N, \ const T alpha, const T *A, const int lda, \ const T *B, const int ldb, const T beta, \ T *C, const int ldc) { \ throw feature_not_available("Cannot perform symmetric matrix-matrix multiplication because compiled without BLAS"); \ } ADEPT_DEFINE_SYMM(double, dsymm_, zsymm_) ADEPT_DEFINE_SYMM(float, ssymm_, csymm_) #undef ADEPT_DEFINE_SYMM // Matrix-vector multiplication where the matrix is symmetric #define ADEPT_DEFINE_SYMV(T, FUNC, FUNC_COMPLEX) \ void cppblas_symv(const BLAS_ORDER Order, \ const BLAS_UPLO Uplo, \ const int N, const T alpha, const T *A, \ const int lda, const T *X, const int incX, \ const T beta, T *Y, const int incY) { \ throw feature_not_available("Cannot perform symmetric matrix-vector multiplication because compiled without BLAS"); \ } ADEPT_DEFINE_SYMV(double, dsymv_, zsymv_) ADEPT_DEFINE_SYMV(float, ssymv_, csymv_) #undef ADEPT_DEFINE_SYMV // Matrix-vector multiplication for a general band matrix #define ADEPT_DEFINE_GBMV(T, FUNC, FUNC_COMPLEX) \ void cppblas_gbmv(const BLAS_ORDER Order, \ const BLAS_TRANSPOSE TransA, \ const int M, const int N, \ const int KL, const int KU, const T alpha,\ const T *A, const int lda, const T *X, \ const int incX, const T beta, T *Y, \ const int incY) { \ throw feature_not_available("Cannot perform band matrix-vector multiplication because compiled without BLAS"); \ } ADEPT_DEFINE_GBMV(double, dgbmv_, zgbmv_) ADEPT_DEFINE_GBMV(float, sgbmv_, cgbmv_) #undef ADEPT_DEFINE_GBMV } } #endif ================================================ FILE: adept/cpplapack.h ================================================ /* cpplapack.h -- C++ interface to LAPACK Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptCppLapack_H #define AdeptCppLapack_H 1 #include #include #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifdef HAVE_LAPACK extern "C" { // External LAPACK Fortran functions void sgetrf_(const int* m, const int* n, float* a, const int* lda, int* ipiv, int* info); void dgetrf_(const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info); void sgetri_(const int* n, float* a, const int* lda, const int* ipiv, float* work, const int* lwork, int* info); void dgetri_(const int* n, double* a, const int* lda, const int* ipiv, double* work, const int* lwork, int* info); void ssytrf_(const char* uplo, const int* n, float* a, const int* lda, int* ipiv, float* work, const int* lwork, int* info); void dsytrf_(const char* uplo, const int* n, double* a, const int* lda, int* ipiv, double* work, const int* lwork, int* info); void ssytri_(const char* uplo, const int* n, float* a, const int* lda, const int* ipiv, float* work, int* info); void dsytri_(const char* uplo, const int* n, double* a, const int* lda, const int* ipiv, double* work, int* info); void ssysv_(const char* uplo, const int* n, const int* nrhs, float* a, const int* lda, int* ipiv, float* b, const int* ldb, float* work, const int* lwork, int* info); void dsysv_(const char* uplo, const int* n, const int* nrhs, double* a, const int* lda, int* ipiv, double* b, const int* ldb, double* work, const int* lwork, int* info); void sgesv_(const int* n, const int* nrhs, float* a, const int* lda, int* ipiv, float* b, const int* ldb, int* info); void dgesv_(const int* n, const int* nrhs, double* a, const int* lda, int* ipiv, double* b, const int* ldb, int* info); } namespace adept { // Overloaded functions provide both single & // double precision versions, and prevents the huge lapacke.h having // to be included in all user code namespace internal { typedef int lapack_int; // Factorize a general matrix inline int cpplapack_getrf(int n, float* a, int lda, int* ipiv) { int info; sgetrf_(&n, &n, a, &lda, ipiv, &info); return info; } inline int cpplapack_getrf(int n, double* a, int lda, int* ipiv) { int info; dgetrf_(&n, &n, a, &lda, ipiv, &info); return info; } // Invert a general matrix inline int cpplapack_getri(int n, float* a, int lda, const int* ipiv) { int info; float work_query; int lwork = -1; // Find out how much work memory required sgetri_(&n, a, &lda, ipiv, &work_query, &lwork, &info); lwork = static_cast(work_query); std::vector work(static_cast(lwork)); // Do full calculation sgetri_(&n, a, &lda, ipiv, &work[0], &lwork, &info); return info; } inline int cpplapack_getri(int n, double* a, int lda, const int* ipiv) { int info; double work_query; int lwork = -1; // Find out how much work memory required dgetri_(&n, a, &lda, ipiv, &work_query, &lwork, &info); lwork = static_cast(work_query); std::vector work(static_cast(lwork)); // Do full calculation dgetri_(&n, a, &lda, ipiv, &work[0], &lwork, &info); return info; } // Factorize a symmetric matrix inline int cpplapack_sytrf(char uplo, int n, float* a, int lda, int* ipiv) { int info; float work_query; int lwork = -1; // Find out how much work memory required ssytrf_(&uplo, &n, a, &lda, ipiv, &work_query, &lwork, &info); lwork = static_cast(work_query); std::vector work(static_cast(lwork)); // Do full calculation ssytrf_(&uplo, &n, a, &lda, ipiv, &work[0], &lwork, &info); return info; } inline int cpplapack_sytrf(char uplo, int n, double* a, int lda, int* ipiv) { int info; double work_query; int lwork = -1; // Find out how much work memory required dsytrf_(&uplo, &n, a, &lda, ipiv, &work_query, &lwork, &info); lwork = static_cast(work_query); std::vector work(static_cast(lwork)); // Do full calculation dsytrf_(&uplo, &n, a, &lda, ipiv, &work[0], &lwork, &info); return info; } // Invert a symmetric matrix inline int cpplapack_sytri(char uplo, int n, float* a, int lda, const int* ipiv) { int info; std::vector work(n); ssytri_(&uplo, &n, a, &lda, ipiv, &work[0], &info); return info; } inline int cpplapack_sytri(char uplo, int n, double* a, int lda, const int* ipiv) { int info; std::vector work(n); dsytri_(&uplo, &n, a, &lda, ipiv, &work[0], &info); return info; } // Solve system of linear equations with general matrix inline int cpplapack_gesv(int n, int nrhs, float* a, int lda, int* ipiv, float* b, int ldb) { int info; sgesv_(&n, &nrhs, a, &lda, ipiv, b, &lda, &info); return info; } inline int cpplapack_gesv(int n, int nrhs, double* a, int lda, int* ipiv, double* b, int ldb) { int info; dgesv_(&n, &nrhs, a, &lda, ipiv, b, &lda, &info); return info; } // Solve system of linear equations with symmetric matrix inline int cpplapack_sysv(char uplo, int n, int nrhs, float* a, int lda, int* ipiv, float* b, int ldb) { int info; float work_query; int lwork = -1; // Find out how much work memory required ssysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work_query, &lwork, &info); lwork = static_cast(work_query); std::vector work(static_cast(lwork)); // Do full calculation ssysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work[0], &lwork, &info); return info; } inline int cpplapack_sysv(char uplo, int n, int nrhs, double* a, int lda, int* ipiv, double* b, int ldb) { int info; double work_query; int lwork = -1; // Find out how much work memory required dsysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work_query, &lwork, &info); lwork = static_cast(work_query); std::vector work(static_cast(lwork)); // Do full calculation dsysv_(&uplo, &n, &nrhs, a, &lda, ipiv, b, &ldb, &work[0], &lwork, &info); return info; } } } #endif #endif ================================================ FILE: adept/index.cpp ================================================ /* index.cpp -- Definitions of "end" and "__" for array indexing Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Robin Hogan This file is part of the Adept library. */ #include namespace adept { ::adept::internal::EndIndex end; ::adept::internal::AllIndex __; } ================================================ FILE: adept/inv.cpp ================================================ /* inv.cpp -- Invert matrices Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include #ifndef AdeptSource_H #include "cpplapack.h" #endif #ifdef HAVE_LAPACK namespace adept { using namespace internal; // ------------------------------------------------------------------- // Invert general square matrix A // ------------------------------------------------------------------- template Array<2,Type,false> inv(const Array<2,Type,false>& A) { using internal::cpplapack_getrf; using internal::cpplapack_getri; if (A.dimension(0) != A.dimension(1)) { throw invalid_operation("Only square matrices can be inverted" ADEPT_EXCEPTION_LOCATION); } Array<2,Type,false> A_; // LAPACKE is more efficient with column-major input A_.resize_column_major(A.dimensions()); A_ = A; std::vector ipiv(A_.dimension(0)); // lapack_int status = LAPACKE_dgetrf(LAPACK_COL_MAJOR, A_.dimension(0), A_.dimension(1), // A_.data(), A_.offset(1), &ipiv[0]); lapack_int status = cpplapack_getrf(A_.dimension(0), A_.data(), A_.offset(1), &ipiv[0]); if (status != 0) { std::stringstream s; s << "Failed to factorize matrix: LAPACK ?getrf returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } // status = LAPACKE_dgetri(LAPACK_COL_MAJOR, A_.dimension(0), // A_.data(), A_.offset(1), &ipiv[0]); status = cpplapack_getri(A_.dimension(0), A_.data(), A_.offset(1), &ipiv[0]); if (status != 0) { std::stringstream s; s << "Failed to invert matrix: LAPACK ?getri returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } return A_; } // ------------------------------------------------------------------- // Invert symmetric matrix A // ------------------------------------------------------------------- template SpecialMatrix,false> inv(const SpecialMatrix,false>& A) { using internal::cpplapack_sytrf; using internal::cpplapack_sytri; SpecialMatrix,false> A_; A_.resize(A.dimension()); A_ = A; // Treat symmetric matrix as column-major char uplo; if (Orient == ROW_LOWER_COL_UPPER) { uplo = 'U'; } else { uplo = 'L'; } std::vector ipiv(A_.dimension(0)); // lapack_int status = LAPACKE_dsytrf(LAPACK_COL_MAJOR, uplo, A_.dimension(), // A_.data(), A_.offset(), &ipiv[0]); lapack_int status = cpplapack_sytrf(uplo, A_.dimension(), A_.data(), A_.offset(), &ipiv[0]); if (status != 0) { std::stringstream s; s << "Failed to factorize symmetric matrix: LAPACK ?sytrf returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } // status = LAPACKE_dsytri(LAPACK_COL_MAJOR, uplo, A_.dimension(), // A_.data(), A_.offset(), &ipiv[0]); status = cpplapack_sytri(uplo, A_.dimension(), A_.data(), A_.offset(), &ipiv[0]); if (status != 0) { std::stringstream s; s << "Failed to invert symmetric matrix: LAPACK ?sytri returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } return A_; } } #else // LAPACK not available namespace adept { using namespace internal; // ------------------------------------------------------------------- // Invert general square matrix A // ------------------------------------------------------------------- template Array<2,Type,false> inv(const Array<2,Type,false>& A) { throw feature_not_available("Cannot invert matrix because compiled without LAPACK"); } // ------------------------------------------------------------------- // Invert symmetric matrix A // ------------------------------------------------------------------- template SpecialMatrix,false> inv(const SpecialMatrix,false>& A) { throw feature_not_available("Cannot invert matrix because compiled without LAPACK"); } } #endif namespace adept { // ------------------------------------------------------------------- // Explicit instantiations // ------------------------------------------------------------------- #define ADEPT_EXPLICIT_INV(TYPE) \ template Array<2,TYPE,false> \ inv(const Array<2,TYPE,false>& A); \ template SpecialMatrix,false> \ inv(const SpecialMatrix,false>&); \ template SpecialMatrix,false> \ inv(const SpecialMatrix,false>&) ADEPT_EXPLICIT_INV(float); ADEPT_EXPLICIT_INV(double); #undef ADEPT_EXPLICIT_INV } ================================================ FILE: adept/jacobian.cpp ================================================ /* jacobian.cpp -- Computation of Jacobian matrix Copyright (C) 2012-2014 University of Reading Copyright (C) 2015-2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifdef _OPENMP #include #endif #include namespace adept { namespace internal { static const int MULTIPASS_SIZE = ADEPT_REAL_PACKET_SIZE == 1 ? ADEPT_MULTIPASS_SIZE : ADEPT_REAL_PACKET_SIZE; } using namespace internal; template T _check_long_double() { // The user may have requested Real to be of type "long double" by // specifying ADEPT_REAL_TYPE_SIZE=16. If the present system can // only support double then sizeof(long double) will be 8, but // Adept will not be emitting the best code for this, so it is // probably better to fail forcing the user to specify // ADEPT_REAL_TYPE_SIZE=8. ADEPT_STATIC_ASSERT(ADEPT_REAL_TYPE_SIZE != 16 || ADEPT_REAL_TYPE_SIZE == sizeof(Real), COMPILER_DOES_NOT_SUPPORT_16_BYTE_LONG_DOUBLE); return 1; } #if ADEPT_REAL_PACKET_SIZE > 1 void Stack::jacobian_forward_kernel(Real* __restrict gradient_multipass_b) const { // Loop forward through the derivative statements for (uIndex ist = 1; ist < n_statements_; ist++) { const Statement& statement = statement_[ist]; // We copy the LHS to "a" in case it appears on the RHS in any // of the following statements Packet a; // Zeroed automatically // Loop through operations for (uIndex iop = statement_[ist-1].end_plus_one; iop < statement.end_plus_one; iop++) { Packet g(gradient_multipass_b+index_[iop]*MULTIPASS_SIZE); Packet m(multiplier_[iop]); a += m * g; } // Copy the results a.put(gradient_multipass_b+statement.index*MULTIPASS_SIZE); } // End of loop over statements } #else void Stack::jacobian_forward_kernel(Real* __restrict gradient_multipass_b) const { // Loop forward through the derivative statements for (uIndex ist = 1; ist < n_statements_; ist++) { const Statement& statement = statement_[ist]; // We copy the LHS to "a" in case it appears on the RHS in any // of the following statements Block a; // Zeroed automatically // Loop through operations for (uIndex iop = statement_[ist-1].end_plus_one; iop < statement.end_plus_one; iop++) { for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { a[i] += multiplier_[iop]*gradient_multipass_b[index_[iop]*MULTIPASS_SIZE+i]; } } // Copy the results for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { gradient_multipass_b[statement.index*MULTIPASS_SIZE+i] = a[i]; } } // End of loop over statements } #endif void Stack::jacobian_forward_kernel_extra(Real* __restrict gradient_multipass_b, uIndex n_extra) const { // Loop forward through the derivative statements for (uIndex ist = 1; ist < n_statements_; ist++) { const Statement& statement = statement_[ist]; // We copy the LHS to "a" in case it appears on the RHS in any // of the following statements Block a; // Zeroed automatically // Loop through operations for (uIndex iop = statement_[ist-1].end_plus_one; iop < statement.end_plus_one; iop++) { for (uIndex i = 0; i < n_extra; i++) { a[i] += multiplier_[iop]*gradient_multipass_b[index_[iop]*MULTIPASS_SIZE+i]; } } // Copy the results for (uIndex i = 0; i < n_extra; i++) { gradient_multipass_b[statement.index*MULTIPASS_SIZE+i] = a[i]; } } // End of loop over statements } // Compute the Jacobian matrix, parallelized using OpenMP. Normally // the user would call the jacobian or jacobian_forward functions, // and the OpenMP version would only be called if OpenMP is // available and the Jacobian matrix is large enough for // parallelization to be worthwhile. Note that jacobian_out must be // allocated to be at least of size m*n, where m is the number of // dependent variables and n is the number of independents. The // independents and dependents must have already been identified // with the functions "independent" and "dependent", otherwise this // function will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. The // offsets in memory of the two dimensions are provided by // dep_offset and indep_offset. This is implemented using a forward // pass, appropriate for m>=n. void Stack::jacobian_forward_openmp(Real* jacobian_out, Index dep_offset, Index indep_offset) const { // Number of blocks to cycle through, including a possible last // block containing fewer than MULTIPASS_SIZE variables int n_block = (n_independent() + MULTIPASS_SIZE - 1) / MULTIPASS_SIZE; uIndex n_extra = n_independent() % MULTIPASS_SIZE; #pragma omp parallel { // std::vector > // gradient_multipass_b(max_gradient_); uIndex gradient_multipass_size = max_gradient_*MULTIPASS_SIZE; Real* __restrict gradient_multipass_b = alloc_aligned(gradient_multipass_size); #pragma omp for schedule(static) for (int iblock = 0; iblock < n_block; iblock++) { // Set the index to the dependent variables for this block uIndex i_independent = MULTIPASS_SIZE * iblock; uIndex block_size = MULTIPASS_SIZE; // If this is the last iteration and the number of extra // elements is non-zero, then set the block size to the number // of extra elements. If the number of extra elements is zero, // then the number of independent variables is exactly divisible // by MULTIPASS_SIZE, so the last iteration will be the // same as all the rest. if (iblock == n_block-1 && n_extra > 0) { block_size = n_extra; } // Set the initial gradients all to zero for (uIndex i = 0; i < gradient_multipass_size; i++) { gradient_multipass_b[i] = 0.0; } // Each seed vector has one non-zero entry of 1.0 for (uIndex i = 0; i < block_size; i++) { gradient_multipass_b[independent_index_[i_independent+i]*MULTIPASS_SIZE+i] = 1.0; } jacobian_forward_kernel(gradient_multipass_b); // Copy the gradients corresponding to the dependent variables // into the Jacobian matrix if (indep_offset == 1) { for (uIndex idep = 0; idep < n_dependent(); idep++) { for (uIndex i = 0; i < block_size; i++) { jacobian_out[idep*dep_offset+i_independent+i] = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i]; } } } else { for (uIndex idep = 0; idep < n_dependent(); idep++) { for (uIndex i = 0; i < block_size; i++) { jacobian_out[(i_independent+i)*indep_offset+idep*dep_offset] = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i]; } } } } // End of loop over blocks free_aligned(gradient_multipass_b); } // End of parallel section } // End of jacobian function // Compute the Jacobian matrix; note that jacobian_out must be // allocated to be of size m*n, where m is the number of dependent // variables and n is the number of independents. The independents // and dependents must have already been identified with the // functions "independent" and "dependent", otherwise this function // will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. This is // implemented using a forward pass, appropriate for m>=n. void Stack::jacobian_forward(Real* jacobian_out, Index dep_offset, Index indep_offset) const { if (independent_index_.empty() || dependent_index_.empty()) { throw(dependents_or_independents_not_identified()); } // If either of the offsets are zero, set them to the size of the // other dimension, which assumes that the full Jacobian matrix is // contiguous in memory. if (dep_offset <= 0) { dep_offset = n_independent(); } if (indep_offset <= 0) { indep_offset = n_dependent(); } #ifdef _OPENMP if (have_openmp_ && !openmp_manually_disabled_ && n_independent() > MULTIPASS_SIZE && omp_get_max_threads() > 1) { // Call the parallel version jacobian_forward_openmp(jacobian_out, dep_offset, indep_offset); return; } #endif // For optimization reasons, we process a block of // MULTIPASS_SIZE columns of the Jacobian at once; calculate // how many blocks are needed and how many extras will remain uIndex n_block = n_independent() / MULTIPASS_SIZE; uIndex n_extra = n_independent() % MULTIPASS_SIZE; ///gradient_multipass_.resize(max_gradient_); uIndex gradient_multipass_size = max_gradient_*MULTIPASS_SIZE; Real* __restrict gradient_multipass_b = alloc_aligned(gradient_multipass_size); // Loop over blocks of MULTIPASS_SIZE columns for (uIndex iblock = 0; iblock < n_block; iblock++) { // Set the index to the dependent variables for this block uIndex i_independent = MULTIPASS_SIZE * iblock; // Set the initial gradients all to zero ///zero_gradient_multipass(); for (uIndex i = 0; i < gradient_multipass_size; i++) { gradient_multipass_b[i] = 0.0; } // Each seed vector has one non-zero entry of 1.0 for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { gradient_multipass_b[independent_index_[i_independent+i]*MULTIPASS_SIZE+i] = 1.0; } jacobian_forward_kernel(gradient_multipass_b); // Copy the gradients corresponding to the dependent variables // into the Jacobian matrix if (indep_offset == 1) { for (uIndex idep = 0; idep < n_dependent(); idep++) { for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { jacobian_out[idep*dep_offset+i_independent+i] = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i]; } } } else { for (uIndex idep = 0; idep < n_dependent(); idep++) { for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { jacobian_out[(i_independent+i)*indep_offset+idep*dep_offset] = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i]; } } } } // End of loop over blocks // Now do the same but for the remaining few columns in the matrix if (n_extra > 0) { uIndex i_independent = MULTIPASS_SIZE * n_block; ///zero_gradient_multipass(); for (uIndex i = 0; i < gradient_multipass_size; i++) { gradient_multipass_b[i] = 0.0; } for (uIndex i = 0; i < n_extra; i++) { gradient_multipass_b[independent_index_[i_independent+i]*MULTIPASS_SIZE+i] = 1.0; } jacobian_forward_kernel_extra(gradient_multipass_b, n_extra); if (indep_offset == 1) { for (uIndex idep = 0; idep < n_dependent(); idep++) { for (uIndex i = 0; i < n_extra; i++) { jacobian_out[idep*dep_offset+i_independent+i] = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i]; } } } else { for (uIndex idep = 0; idep < n_dependent(); idep++) { for (uIndex i = 0; i < n_extra; i++) { jacobian_out[(i_independent+i)*indep_offset+idep*dep_offset] = gradient_multipass_b[dependent_index_[idep]*MULTIPASS_SIZE+i]; } } } } free_aligned(gradient_multipass_b); } // Compute the Jacobian matrix, parallelized using OpenMP. Normally // the user would call the jacobian or jacobian_reverse functions, // and the OpenMP version would only be called if OpenMP is // available and the Jacobian matrix is large enough for // parallelization to be worthwhile. Note that jacobian_out must be // allocated to be at least of size m*n, where m is the number of // dependent variables and n is the number of independents. The // independents and dependents must have already been identified // with the functions "independent" and "dependent", otherwise this // function will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. The // offsets in memory of the two dimensions are provided by // dep_offset and indep_offset. This is implemented using a reverse // pass, appropriate for m > gradient_multipass_b(max_gradient_); #pragma omp for schedule(static) for (int iblock = 0; iblock < n_block; iblock++) { // Set the index to the dependent variables for this block uIndex i_dependent = MULTIPASS_SIZE * iblock; uIndex block_size = MULTIPASS_SIZE; // If this is the last iteration and the number of extra // elements is non-zero, then set the block size to the number // of extra elements. If the number of extra elements is zero, // then the number of independent variables is exactly divisible // by MULTIPASS_SIZE, so the last iteration will be the // same as all the rest. if (iblock == n_block-1 && n_extra > 0) { block_size = n_extra; } // Set the initial gradients all to zero for (std::size_t i = 0; i < gradient_multipass_b.size(); i++) { gradient_multipass_b[i].zero(); } // Each seed vector has one non-zero entry of 1.0 for (uIndex i = 0; i < block_size; i++) { gradient_multipass_b[dependent_index_[i_dependent+i]][i] = 1.0; } // Loop backward through the derivative statements for (uIndex ist = n_statements_-1; ist > 0; ist--) { const Statement& statement = statement_[ist]; // We copy the RHS to "a" in case it appears on the LHS in any // of the following statements Real a[MULTIPASS_SIZE]; #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK // For large blocks, we only process the ones where a[i] is // non-zero uIndex i_non_zero[MULTIPASS_SIZE]; #endif uIndex n_non_zero = 0; for (uIndex i = 0; i < block_size; i++) { a[i] = gradient_multipass_b[statement.index][i]; gradient_multipass_b[statement.index][i] = 0.0; if (a[i] != 0.0) { #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK i_non_zero[n_non_zero++] = i; #else n_non_zero = 1; #endif } } // Only do anything for this statement if any of the a values // are non-zero if (n_non_zero) { // Loop through the operations for (uIndex iop = statement_[ist-1].end_plus_one; iop < statement.end_plus_one; iop++) { // Try to minimize pointer dereferencing by making local // copies Real multiplier = multiplier_[iop]; Real* __restrict gradient_multipass = &(gradient_multipass_b[index_[iop]][0]); #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK // For large blocks, loop over only the indices // corresponding to non-zero a for (uIndex i = 0; i < n_non_zero; i++) { gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]]; } #else // For small blocks, do all indices for (uIndex i = 0; i < block_size; i++) { // for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { gradient_multipass[i] += multiplier*a[i]; } #endif } } } // End of loop over statement // Copy the gradients corresponding to the independent // variables into the Jacobian matrix if (dep_offset == 1) { for (uIndex iindep = 0; iindep < n_independent(); iindep++) { for (uIndex i = 0; i < block_size; i++) { jacobian_out[iindep*indep_offset+i_dependent+i] = gradient_multipass_b[independent_index_[iindep]][i]; } } } else { for (uIndex iindep = 0; iindep < n_independent(); iindep++) { for (uIndex i = 0; i < block_size; i++) { jacobian_out[iindep*indep_offset+(i_dependent+i)*dep_offset] = gradient_multipass_b[independent_index_[iindep]][i]; } } } } // End of loop over blocks } // end #pragma omp parallel } // end jacobian_reverse_openmp // Compute the Jacobian matrix; note that jacobian_out must be // allocated to be of size m*n, where m is the number of dependent // variables and n is the number of independents. The independents // and dependents must have already been identified with the // functions "independent" and "dependent", otherwise this function // will fail with FAILURE_XXDEPENDENT_NOT_IDENTIFIED. This is // implemented using a reverse pass, appropriate for m MULTIPASS_SIZE && omp_get_max_threads() > 1) { // Call the parallel version jacobian_reverse_openmp(jacobian_out, dep_offset, indep_offset); return; } #endif // gradient_multipass_.resize(max_gradient_); std::vector > gradient_multipass_b(max_gradient_); // For optimization reasons, we process a block of // MULTIPASS_SIZE rows of the Jacobian at once; calculate // how many blocks are needed and how many extras will remain uIndex n_block = n_dependent() / MULTIPASS_SIZE; uIndex n_extra = n_dependent() % MULTIPASS_SIZE; uIndex i_dependent = 0; // uIndex of first row in the block we are // currently computing // Loop over the of MULTIPASS_SIZE rows for (uIndex iblock = 0; iblock < n_block; iblock++) { // Set the initial gradients all to zero // zero_gradient_multipass(); for (std::size_t i = 0; i < gradient_multipass_b.size(); i++) { gradient_multipass_b[i].zero(); } // Each seed vector has one non-zero entry of 1.0 for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { gradient_multipass_b[dependent_index_[i_dependent+i]][i] = 1.0; } // Loop backward through the derivative statements for (uIndex ist = n_statements_-1; ist > 0; ist--) { const Statement& statement = statement_[ist]; // We copy the RHS to "a" in case it appears on the LHS in any // of the following statements Real a[MULTIPASS_SIZE]; #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK // For large blocks, we only process the ones where a[i] is // non-zero uIndex i_non_zero[MULTIPASS_SIZE]; #endif uIndex n_non_zero = 0; for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { a[i] = gradient_multipass_b[statement.index][i]; gradient_multipass_b[statement.index][i] = 0.0; if (a[i] != 0.0) { #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK i_non_zero[n_non_zero++] = i; #else n_non_zero = 1; #endif } } // Only do anything for this statement if any of the a values // are non-zero if (n_non_zero) { // Loop through the operations for (uIndex iop = statement_[ist-1].end_plus_one; iop < statement.end_plus_one; iop++) { // Try to minimize pointer dereferencing by making local // copies Real multiplier = multiplier_[iop]; Real* __restrict gradient_multipass = &(gradient_multipass_b[index_[iop]][0]); #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK // For large blocks, loop over only the indices // corresponding to non-zero a for (uIndex i = 0; i < n_non_zero; i++) { gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]]; } #else // For small blocks, do all indices for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { gradient_multipass[i] += multiplier*a[i]; } #endif } } } // End of loop over statement // Copy the gradients corresponding to the independent variables // into the Jacobian matrix if (dep_offset == 1) { for (uIndex iindep = 0; iindep < n_independent(); iindep++) { for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { jacobian_out[iindep*indep_offset+i_dependent+i] = gradient_multipass_b[independent_index_[iindep]][i]; } } } else { for (uIndex iindep = 0; iindep < n_independent(); iindep++) { for (uIndex i = 0; i < MULTIPASS_SIZE; i++) { jacobian_out[iindep*indep_offset+(i_dependent+i)*dep_offset] = gradient_multipass_b[independent_index_[iindep]][i]; } } } i_dependent += MULTIPASS_SIZE; } // End of loop over blocks // Now do the same but for the remaining few rows in the matrix if (n_extra > 0) { for (std::size_t i = 0; i < gradient_multipass_b.size(); i++) { gradient_multipass_b[i].zero(); } // zero_gradient_multipass(); for (uIndex i = 0; i < n_extra; i++) { gradient_multipass_b[dependent_index_[i_dependent+i]][i] = 1.0; } for (uIndex ist = n_statements_-1; ist > 0; ist--) { const Statement& statement = statement_[ist]; Real a[MULTIPASS_SIZE]; #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK uIndex i_non_zero[MULTIPASS_SIZE]; #endif uIndex n_non_zero = 0; for (uIndex i = 0; i < n_extra; i++) { a[i] = gradient_multipass_b[statement.index][i]; gradient_multipass_b[statement.index][i] = 0.0; if (a[i] != 0.0) { #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK i_non_zero[n_non_zero++] = i; #else n_non_zero = 1; #endif } } if (n_non_zero) { for (uIndex iop = statement_[ist-1].end_plus_one; iop < statement.end_plus_one; iop++) { Real multiplier = multiplier_[iop]; Real* __restrict gradient_multipass = &(gradient_multipass_b[index_[iop]][0]); #if MULTIPASS_SIZE > MULTIPASS_SIZE_ZERO_CHECK for (uIndex i = 0; i < n_non_zero; i++) { gradient_multipass[i_non_zero[i]] += multiplier*a[i_non_zero[i]]; } #else for (uIndex i = 0; i < n_extra; i++) { gradient_multipass[i] += multiplier*a[i]; } #endif } } } if (dep_offset == 1) { for (uIndex iindep = 0; iindep < n_independent(); iindep++) { for (uIndex i = 0; i < n_extra; i++) { jacobian_out[iindep*indep_offset+i_dependent+i] = gradient_multipass_b[independent_index_[iindep]][i]; } } } else { for (uIndex iindep = 0; iindep < n_independent(); iindep++) { for (uIndex i = 0; i < n_extra; i++) { jacobian_out[iindep*indep_offset+(i_dependent+i)*dep_offset] = gradient_multipass_b[independent_index_[iindep]][i]; } } } } } // Return the Jacobian matrix in the matrix "jac", using the forward // or reverse method depending which would be faster void Stack::jacobian(Array<2,Real,false> jac) const { if (jac.dimension(0) != n_dependent() || jac.dimension(1) != n_independent()) { throw size_mismatch("Jacobian matrix has wrong size"); } if (n_independent() <= n_dependent()) { jacobian_forward(jac.data(), jac.offset(0), jac.offset(1)); } else { jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1)); } } // Return the Jacobian matrix in the matrix "jac", explicitly // specifying whether to use the forward or reverse method void Stack::jacobian_forward(Array<2,Real,false> jac) const { if (jac.dimension(0) != n_dependent() || jac.dimension(1) != n_independent()) { throw size_mismatch("Jacobian matrix has wrong size"); } jacobian_forward(jac.data(), jac.offset(0), jac.offset(1)); } void Stack::jacobian_reverse(Array<2,Real,false> jac) const { if (jac.dimension(0) != n_dependent() || jac.dimension(1) != n_independent()) { throw size_mismatch("Jacobian matrix has wrong size"); } jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1)); } // Return the Jacobian matrix using the forward or reverse method // depending which would be faster Array<2,Real,false> Stack::jacobian() const { Array<2,Real,false> jac(n_dependent(), n_independent()); if (n_independent() <= n_dependent()) { jacobian_forward(jac.data(), jac.offset(0), jac.offset(1)); } else { jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1)); } return jac; } // Return the Jacobian matrix, explicitly specifying whether to use // the forward or reverse method Array<2,Real,false> Stack::jacobian_forward() const { Array<2,Real,false> jac(n_dependent(), n_independent()); jacobian_forward(jac.data(), jac.offset(0), jac.offset(1)); return jac; } Array<2,Real,false> Stack::jacobian_reverse() const { Array<2,Real,false> jac(n_dependent(), n_independent()); jacobian_reverse(jac.data(), jac.offset(0), jac.offset(1)); return jac; } } // End namespace adept ================================================ FILE: adept/line_search.cpp ================================================ /* line_search.cpp -- Approximate minimization of function along a line Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include namespace adept { // Compute the cost function "cf" and gradient vector "gradient", // along with the scalar gradient "grad" in the search direction // "direction" (normalized with "dir_scaling"), from the state // vector "x" plus a step "step_size" in the search direction. If // the resulting cost function and gradient satisfy the Wolfe // conditions for sufficient convergence, copy the new state vector // to "x" and the step size to "final_step_size", and return // MINIMIZER_STATUS_SUCCESS. Otherwise, return // MINIMIZER_STATUS_NOT_YET_CONVERGED. Error conditions // MINIMIZER_STATUS_INVALID_COST_FUNCTION and // MINIMIZER_STATUS_INVALID_GRADIENT are also possible. MinimizerStatus Minimizer::line_search_gradient_check( Optimizable& optimizable, // Object defining function to be minimized Vector x, // Initial and returned state vector const Vector& direction, // Un-normalized search direction Vector test_x, // Test state vector (working memory) Real& final_step_size, // Returned step size if converged Vector gradient, // Gradient vector int& state_up_to_date, // Is state up-to-date? Real step_size, // Candidate step size Real grad0, // Gradient in direction at start of line search Real dir_scaling, // Scaling of direction vector Real& cf, // Returned cost function Real& grad, // Returned gradient in direction Real curvature_coeff) // Factor by which gradient should reduce (0-1) { test_x = x + (step_size * dir_scaling) * direction; cf = optimizable.calc_cost_function_gradient(test_x, gradient); ++n_samples_; state_up_to_date = -1; // Check cost function and gradient are finite if (!std::isfinite(cf)) { return MINIMIZER_STATUS_INVALID_COST_FUNCTION; } else if (any(!isfinite(gradient))) { return MINIMIZER_STATUS_INVALID_GRADIENT; } // Calculate gradient in search direction grad = dot_product(direction, gradient) * dir_scaling; // Check Wolfe conditions if (cf <= cost_function_ + armijo_coeff_*step_size*grad0 // Armijo condition && std::fabs(grad) <= -curvature_coeff*grad0) { // Curvature condition x = test_x; final_step_size = step_size; cost_function_ = cf; state_up_to_date = 1; return MINIMIZER_STATUS_SUCCESS; } else { return MINIMIZER_STATUS_NOT_YET_CONVERGED; } } // Perform line search starting at state vector "x" with gradient // vector "gradient", and initial step "step_size" in un-normalized // direction "direction". Successful minimization of the function // (according to Wolfe conditions) will lead to // MINIMIZER_STATUS_SUCCESS being returned, the new state stored in // "x", and if state_up_to_date >= 1 then the gradient stored in // "gradient". Other possible return values are // MINIMIZER_STATUS_FAILED_TO_CONVERGE and // MINIMIZER_STATUS_DIRECTION_UPHILL if the initial direction points // uphill, or MINIMIZER_STATUS_INVALID_COST_FUNCTION, // MINIMIZER_STATUS_INVALID_GRADIENT or // MINIMIZER_STATUS_BOUND_REACHED. First the minimum is bracketed, // then a cubic polynomial is fitted to the values and gradients of // the function at the two points in order to select the next test // point. MinimizerStatus Minimizer::line_search( Optimizable& optimizable, // Object defining function to be minimized Vector x, // Initial and returned state vector const Vector& direction, // Un-normalized search direction Vector test_x, // Test state vector (working memory) Real& step_size, // Initial and final step size Vector gradient, // Initial and possibly final gradient int& state_up_to_date, // 1 if gradient up-to-date, -1 otherwise Real curvature_coeff, // Factor by which gradient should reduce (0-1) Real bound_step_size) // Maximum step until bound is reached (-1 for no bound) { Real dir_scaling = 1.0 / norm2(direction); // Numerical suffixes to variables indicate different locations // along the line: // 0 = initial point of line search, constant within this function // 1 = point at which gradient has been calculated (initially the same as 0) // 2 = test point // 3 = test point // Step sizes const Real ss0 = 0.0; Real ss1 = ss0; Real ss2 = step_size; Real ss3; // Gradients in search direction Real grad0 = dot_product(direction, gradient) * dir_scaling; Real grad1 = grad0; Real grad2, grad3; // Cost function values Real cf0 = cost_function_; Real cf1 = cf0; Real cf2, cf3; int iterations_remaining = max_line_search_iterations_; bool is_bound_step = (bound_step_size > 0.0); bool at_bound = false; if (grad0 >= 0.0) { return MINIMIZER_STATUS_DIRECTION_UPHILL; } // Check initial step size is within bounds if (max_step_size_ > 0.0 && ss2 > max_step_size_) { ss2 = max_step_size_; } if (is_bound_step && ss2 >= bound_step_size) { ss2 = bound_step_size; at_bound = true; } // First step: bound the minimum while (iterations_remaining > 0) { MinimizerStatus status = line_search_gradient_check(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, ss2, grad0, dir_scaling, cf2, grad2, curvature_coeff); if (status == MINIMIZER_STATUS_SUCCESS) { if (at_bound) { status = MINIMIZER_STATUS_BOUND_REACHED; } return status; } else if (status != MINIMIZER_STATUS_NOT_YET_CONVERGED) { // Cost function or its gradient not finite: revert to // previous step step_size = cf1; if (cf1 > 0.0) { x += (ss1 * dir_scaling) * direction; } state_up_to_date = 0; return status; } if (grad2 > 0.0 || cf2 >= cf1) { // Positive gradient or cost function increase -> bounded // between points 1 and 2 break; } else if (at_bound) { // The cost function has been reduced but we are already at // the maximum step size and the gradient points towards it: // make this point the solution x += (ss2 * dir_scaling) * direction; step_size = ss2; cost_function_ = cf2; state_up_to_date = 1; return MINIMIZER_STATUS_BOUND_REACHED; } else { // Reduced cost function but not yet bounded -> look further // ahead Real new_step; if (cf1 > cf2+grad2*(ss1-ss2)) { // Positive curvature: fit a quadratic Real curvature = 2.0*(cf1-cf2-grad2*(ss1-ss2))/((ss1-ss2)*(ss1-ss2)); new_step = ss2-grad2/curvature; // Newton's method // Bounds on actual step size new_step = std::max(ss1+1.1*(ss2-ss1), std::min(new_step, ss1+10.0*(ss2-ss1))); if (max_step_size_ > 0.0 && new_step-ss2 > max_step_size_) { new_step = ss2 + max_step_size_; } } else { // Cliff gets steeper... simply jump ahead a lot more new_step = ss2 + 5.0*(ss2-ss1); if (max_step_size_ > 0.0 && new_step-ss2 > max_step_size_) { new_step = ss2 + max_step_size_; } } ss1 = ss2; cf1 = cf2; grad1 = grad2; ss2 = new_step; if (is_bound_step && ss2 >= bound_step_size) { ss2 = bound_step_size; at_bound = true; } } } // Second step: reduce the bounds until we get sufficiently close // to the minimum while (iterations_remaining > 0) { if (ss2 <= ss1) { // Two points are identical! if (cf1 < cf0) { // Return value at point 1 x += (ss1 * dir_scaling) * direction; step_size = ss1; cost_function_ = cf1; return MINIMIZER_STATUS_SUCCESS; } else { // Cost function did not decrease at all return MINIMIZER_STATUS_FAILED_TO_CONVERGE; } } // Minimizer of cubic function Real step_diff = ss2-ss1; Real theta = (cf1-cf2) * 3.0 / step_diff + grad1 + grad2; Real max_grad = std::max(std::fabs(theta), std::max(std::fabs(grad1), std::fabs(grad2))); Real scaled_theta = theta / max_grad; Real gamma = max_grad * std::sqrt(scaled_theta*scaled_theta - (grad1/max_grad) * (grad2/max_grad)); ss3 = ss1 + ((gamma - grad1 + theta) / (2.0*gamma + grad2 - grad1)) * step_diff; // Bound the step size to be at least 5% away from each end ss3 = std::max(0.95*ss1+0.05*ss2, std::min(0.05*ss1+0.95*ss2, ss3)); MinimizerStatus status = line_search_gradient_check(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, ss3, grad0, dir_scaling, cf3, grad3, curvature_coeff); if (status == MINIMIZER_STATUS_SUCCESS) { return status; } else if (status != MINIMIZER_STATUS_NOT_YET_CONVERGED) { // Cost function or its gradient not finite: revert to // previous step step_size = cf1; if (cf1 > 0.0) { x += (ss1 * dir_scaling) * direction; } state_up_to_date = 0; return status; } if (grad3 > 0.0) { // Positive gradient -> bounded between points 1 and 3 ss2 = ss3; cf2 = cf3; grad2 = grad3; } else if (cf3 < cf1) { // Reduced cost function, negative gradient ss1 = ss3; cf1 = cf3; grad1 = grad3; } else { // Increased cost function, negative gradient ss2 = ss3; cf2 = cf3; grad2 = grad3; } --iterations_remaining; } // Maximum iterations reached: check if cost function has been // reduced at all state_up_to_date = -1; if (cf2 < cf1) { // Return value at point 2 x += (ss2 * dir_scaling) * direction; step_size = ss2; cost_function_ = cf2; } else if (cf1 < cf0) { // Return value at point 1 x += (ss1 * dir_scaling) * direction; step_size = ss1; cost_function_ = cf1; } else { // Cost function did not decrease at all return MINIMIZER_STATUS_FAILED_TO_CONVERGE; } // Cost function decreased return MINIMIZER_STATUS_SUCCESS; } } ================================================ FILE: adept/minimize_conjugate_gradient.cpp ================================================ /* minimize_conjugate_gradient.cpp -- Minimize function using Conjugate Gradient algorithm Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include namespace adept { // Minimize the cost function embodied in "optimizable" using the // Conjugate-Gradient algorithm, where "x" is the initial state // vector and also where the solution is stored. By default the // Polak-Ribiere method is used to compute the new search direction, // but Fletcher-Reeves is also available. MinimizerStatus Minimizer::minimize_conjugate_gradient(Optimizable& optimizable, Vector x, bool use_fletcher_reeves) { int nx = x.size(); // Initial values n_iterations_ = 0; n_samples_ = 0; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; cost_function_ = std::numeric_limits::infinity(); // The Conjugate-Gradient method is the most efficient // gradient-based method in terms of memory usage, requiring a // working memory of just 4*nx, making it suitable for large state // vectors. Vector gradient(nx); Vector previous_gradient(nx); Vector direction(nx); Vector test_x(nx); // Used by the line search only // Does the last calculation of the cost function in "optimizable" // match the current contents of the state vector x? -1=no, 0=yes, // 1=yes and the last calculation included the gradient, 2=yes and // the last calculation included gradient and Hessian. int state_up_to_date = -1; // Initial step size Real step_size = 1.0; if (max_step_size_ > 0.0) { step_size = max_step_size_; } // A restart is performed every nx+1 iterations bool do_restart = true; int iteration_at_last_restart = n_iterations_; // Main loop while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) { // If the last line search found a minimum along the lines // satisfying the Wolfe conditions, then the current cost // function and gradient will be consistent with the current // state vector. Otherwise we need to compute them. if (state_up_to_date < 1) { cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); state_up_to_date = 1; ++n_samples_; } if (n_iterations_ == 0) { start_cost_function_ = cost_function_; } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } // Compute L2 norm of gradient to see how "flat" the environment // is gradient_norm_ = norm2(gradient); // Report progress using user-defined function optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_); // Convergence has been achieved if the L2 norm has been reduced // to a user-specified threshold if (gradient_norm_ <= converged_gradient_norm_) { status_ = MINIMIZER_STATUS_SUCCESS; break; } // Restart every nx+1 iterations if (n_iterations_ - iteration_at_last_restart > nx) { do_restart = true; } // Find search direction if (do_restart) { // Simple gradient descent after a restart direction = -gradient; do_restart = false; iteration_at_last_restart = n_iterations_; } else { // The brains of the Conjugate-Gradient method - note that // generally the Polak-Ribiere method is believed to be // superior to Fletcher-Reeves Real beta; if (use_fletcher_reeves) { // Fletcher-Reeves method beta = dot_product(gradient, gradient) / dot_product(previous_gradient, previous_gradient); } else { // Default: Polak-Ribiere method beta = std::max(sum(gradient * (gradient - previous_gradient)) / dot_product(previous_gradient, previous_gradient), 0.0); } // beta==0 is equivalent to gradient descent (i.e. a restart) if (beta <= 0) { iteration_at_last_restart = n_iterations_; } // Compute new direction direction = beta*direction - gradient; } // Store gradient for computing beta in next iteration previous_gradient = gradient; // Perform line search, storing new state vector in x MinimizerStatus ls_status = line_search(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, cg_curvature_coeff_); if (ls_status == MINIMIZER_STATUS_SUCCESS) { // Successfully minimized along search direction: continue to // next iteration status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; } else if (iteration_at_last_restart != n_iterations_) { // Line search either made no progress or encountered a // non-finite cost function or gradient, and this was not a // restart; try restarting once do_restart = true; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; } else { // Unrecoverable failure in line-search: return status to // calling function status_ = ls_status; } // Better convergence if first step size on next line search is // larger than the actual step size on the last line search step_size *= 2.0; ++n_iterations_; if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED && n_iterations_ >= max_iterations_) { status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED; } // End of main loop: if status_ is anything other than // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations // are performed } if (state_up_to_date < ensure_updated_state_) { // The last call to calc_cost_function* was not with the state // vector returned to the user, and they want it to be. if (ensure_updated_state_ > 0) { // User wants at least the first derivative cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); } else { // User does not need derivatives to have been computed cost_function_ = optimizable.calc_cost_function(x); } } return status_; } // Minimize the cost function embodied in "optimizable" using the // Conjugate-Gradient algorithm, where "x" is the initial state // vector and also where the solution is stored, subject to the // constraint that x lies between min_x and max_x. By default the // Polak-Ribiere method is used to compute the new search direction, // but Fletcher-Reeves is also available. MinimizerStatus Minimizer::minimize_conjugate_gradient_bounded(Optimizable& optimizable, Vector x, const Vector& min_x, const Vector& max_x, bool use_fletcher_reeves) { if (any(min_x >= max_x) || min_x.size() != x.size() || max_x.size() != x.size()) { return MINIMIZER_STATUS_INVALID_BOUNDS; } int nx = x.size(); // Initial values n_iterations_ = 0; n_samples_ = 0; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; cost_function_ = std::numeric_limits::infinity(); // The Conjugate-Gradient method is the most efficient // gradient-based method in terms of memory usage, requiring a // working memory of just 4*nx, making it suitable for large state // vectors. Vector gradient(nx); Vector previous_gradient(nx); Vector direction(nx); Vector test_x(nx); // Used by the line search only // Which state variables are at the minimum bound (-1), maximum // bound (1) or free (0)? intVector bound_status(nx); bound_status = 0; // Ensure that initial x lies within the specified bounds bound_status.where(x >= max_x) = 1; bound_status.where(x <= min_x) = -1; x = max(min_x, min(x, max_x)); int nbound = count(bound_status != 0); int nfree = nx - nbound; // Floating-point number containing 1.0 if unbound and 0.0 if // bound Vector unbound_status(nx); unbound_status = 1.0-fabs(bound_status); // Does the last calculation of the cost function in "optimizable" // match the current contents of the state vector x? -1=no, 0=yes, // 1=yes and the last calculation included the gradient, 2=yes and // the last calculation included gradient and Hessian. int state_up_to_date = -1; // Initial step size Real step_size = 1.0; if (max_step_size_ > 0.0) { step_size = max_step_size_; } // A restart is performed every nx+1 iterations bool do_restart = true; int iteration_at_last_restart = n_iterations_; // Main loop while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) { // If the last line search found a minimum along the lines // satisfying the Wolfe conditions, then the current cost // function and gradient will be consistent with the current // state vector. Otherwise we need to compute them. if (state_up_to_date < 1) { cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); state_up_to_date = 1; ++n_samples_; if (n_iterations_ == 0) { start_cost_function_ = cost_function_; } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } } // Check whether the bound status of each state variable is // consistent with the gradient if a steepest descent were to be // taken, and if not flag a restart if (any(bound_status == -1 && gradient < 0.0) || any(bound_status == 1 && gradient > 0.0)) { bound_status.where(bound_status == -1 && gradient < 0.0) = 0; bound_status.where(bound_status == 1 && gradient > 0.0) = 0; unbound_status = 1.0-fabs(bound_status); do_restart = true; } nbound = count(bound_status != 0); nfree = nx - nbound; // Set gradient at bound points to zero gradient.where(bound_status != 0) = 0.0; // Compute L2 norm of gradient to see how "flat" the environment // is if (nfree > 0) { gradient_norm_ = norm2(gradient); } else { // If no dimensions are in play we are at a corner of the // bounds and the gradient is pointing into the corner: we // have reached a minimum in the cost function subject to the // bounds so have converged gradient_norm_ = 0.0; } // Report progress using user-defined function optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_); // Convergence has been achieved if the L2 norm has been reduced // to a user-specified threshold if (gradient_norm_ <= converged_gradient_norm_) { status_ = MINIMIZER_STATUS_SUCCESS; break; } // Restart every nx+1 iterations if (n_iterations_ - iteration_at_last_restart > nx) { do_restart = true; } // Find search direction if (do_restart) { // Simple gradient descent after a restart direction = -gradient; do_restart = false; iteration_at_last_restart = n_iterations_; } else { // The brains of the Conjugate-Gradient method - note that // generally the Polak-Ribiere method is believed to be // superior to Fletcher-Reeves Real beta; if (use_fletcher_reeves) { // Fletcher-Reeves method beta = dot_product(gradient, gradient) / dot_product(previous_gradient, previous_gradient); } else { // Default: Polak-Ribiere method beta = std::max(sum(gradient * (gradient - previous_gradient)) / dot_product(previous_gradient, previous_gradient), 0.0); } // beta==0 is equivalent to gradient descent (i.e. a restart) if (beta <= 0) { iteration_at_last_restart = n_iterations_; } // Compute new direction direction = beta*direction - gradient; } // Store gradient for computing beta in next iteration previous_gradient = gradient; // Distance to the nearest bound Real dir_scaling = norm2(direction); Real bound_step_size = std::numeric_limits::max(); int i_nearest_bound = -1; int i_bound_type = 0; // Work out the maximum step size along "direction" before a // bound is met... there must be a faster way to do this for (int ix = 0; ix < nx; ++ix) { if (direction(ix) > 0.0 && max_x(ix) < std::numeric_limits::max()) { Real local_bound_step_size = dir_scaling*(max_x(ix)-x(ix))/direction(ix); if (bound_step_size >= local_bound_step_size) { bound_step_size = local_bound_step_size; i_nearest_bound = ix; i_bound_type = 1; } } else if (direction(ix) < 0.0 && min_x(ix) > -std::numeric_limits::max()) { Real local_bound_step_size = dir_scaling*(min_x(ix)-x(ix))/direction(ix); if (bound_step_size >= local_bound_step_size) { bound_step_size = local_bound_step_size; i_nearest_bound = ix; i_bound_type = -1; } } } MinimizerStatus ls_status; // line-search outcome if (i_nearest_bound >= 0) { // Perform line search, storing new state vector in x ls_status = line_search(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, cg_curvature_coeff_, bound_step_size); if (ls_status == MINIMIZER_STATUS_BOUND_REACHED) { bound_status(i_nearest_bound) = i_bound_type; do_restart = true; ls_status = MINIMIZER_STATUS_SUCCESS; } } else { // Perform line search, storing new state vector in x ls_status = line_search(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, cg_curvature_coeff_); } if (ls_status == MINIMIZER_STATUS_SUCCESS) { // Successfully minimized along search direction: continue to // next iteration status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; } else if (iteration_at_last_restart != n_iterations_) { // Line search either made no progress or encountered a // non-finite cost function or gradient, and this was not a // restart; try restarting once do_restart = true; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; } else { // Unrecoverable failure in line-search: return status to // calling function status_ = ls_status; } // Better convergence if first step size on next line search is // larger than the actual step size on the last line search step_size *= 2.0; ++n_iterations_; if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED && n_iterations_ >= max_iterations_) { status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED; } // End of main loop: if status_ is anything other than // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations // are performed } if (state_up_to_date < ensure_updated_state_) { // The last call to calc_cost_function* was not with the state // vector returned to the user, and they want it to be. if (ensure_updated_state_ > 0) { // User wants at least the first derivative cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); } else { // User does not need derivatives to have been computed cost_function_ = optimizable.calc_cost_function(x); } } return status_; } }; ================================================ FILE: adept/minimize_levenberg_marquardt.cpp ================================================ /* minimize_levenberg_marquardt.cpp -- Minimize function using Levenberg-Marquardt algorithm Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include namespace adept { // Minimize the cost function embodied in "optimizable" using the // Levenberg-Marquardt algorithm, where "x" is the initial state // vector and also where the solution is stored. MinimizerStatus Minimizer::minimize_levenberg_marquardt(Optimizable& optimizable, Vector x, bool use_additive_damping) { int nx = x.size(); // Initial values n_iterations_ = 0; n_samples_ = 0; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; cost_function_ = std::numeric_limits::infinity(); Real new_cost; // The main memory storage for the Levenberg family of methods // consists of the following three vectors... Vector new_x(nx); Vector gradient(nx); Vector dx(nx); // ...and the Hessian matrix, which is stored explicitly SymmMatrix hessian(nx); hessian = 0.0; Real damping = levenberg_damping_start_; gradient_norm_ = -1.0; // Original Levenberg is additive to the diagonal of the Hessian // so to make the performance insensitive to an arbitrary scaling // of the cost function, we scale the damping factor by the mean // of the diagonal of the Hessian Real diag_scaling; // Does the last calculation of the cost function in "optimizable" // match the current contents of the state vector x? -1=no, 0=yes, // 1=yes and the last calculation included the gradient, 2=yes and // the last calculation included gradient and Hessian. int state_up_to_date = -1; do { // At this point we have either just started or have just // reduced the cost function cost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient, hessian); diag_scaling = mean(hessian.diag_vector()); state_up_to_date = 2; ++n_samples_; if (n_iterations_ == 0) { start_cost_function_ = cost_function_; } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } // Compute L2 norm of gradient to see how "flat" the environment // is gradient_norm_ = norm2(gradient); // Report progress using user-defined function optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_); // Convergence has been achieved if the L2 norm has been reduced // to a user-specified threshold if (gradient_norm_ <= converged_gradient_norm_) { status_ = MINIMIZER_STATUS_SUCCESS; break; } // Try to minimize cost function Real previous_diag_scaling = 1.0; // Used in Levenberg-Marquardt version Real previous_diag_modifier = 0.0; // Used in Levenberg version while(true) { if (!use_additive_damping) { // Levenberg-Marquardt formula: scale the diagonal of the // Hessian, where the larger the value of "damping", the // closer the resulting behaviour is to steepest descent hessian.diag_vector() *= (1.0 + damping)/previous_diag_scaling; previous_diag_scaling = 1.0 + damping; } else { // Older Levenberg approach: add to the diagonal instead hessian.diag_vector() += damping*diag_scaling - previous_diag_modifier; previous_diag_modifier = damping*diag_scaling; } dx = -adept::solve(hessian, gradient); // Limit the maximum step size, if required if (max_step_size_ > 0.0) { Real max_dx = maxval(abs(dx)); if (max_dx > max_step_size_) { dx *= (max_step_size_/max_dx); } } // Compute new cost state vector and cost function, but not // gradient or Hessian for efficiency new_x = x+dx; new_cost = optimizable.calc_cost_function(new_x); state_up_to_date = -1; ++n_samples_; // If cost function is not finite it may be possible to // recover by trying smaller step sizes bool cost_invalid = !std::isfinite(new_cost); if (new_cost >= cost_function_ || cost_invalid) { // We haven't managed to reduce the cost function: increase // damping value to take smaller steps if (damping <= 0.0) { damping = levenberg_damping_restart_; } else if (damping < levenberg_damping_max_) { damping *= levenberg_damping_multiplier_; } else { // The damping value is now larger than the maximum so we // can get no further if (cost_invalid) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; } else { status_ = MINIMIZER_STATUS_FAILED_TO_CONVERGE; } break; } } else { // Managed to reduce cost function x = new_x; n_iterations_++; // Reduce damping for next iteration if (damping > levenberg_damping_min_) { damping /= levenberg_damping_divider_; } else { damping = 0.0; } if (n_iterations_ >= max_iterations_) { status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED; } break; } } // Inner loop } while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED); if (state_up_to_date < ensure_updated_state_) { // The last call to calc_cost_function* was not with the state // vector returned to the user, and they want it to be. Note // that the cost function and gradient norm ought to be // up-to-date already at this point. if (ensure_updated_state_ > 0) { // User wants at least the first derivative, but // calc_cost_function_gradient() is not guaranteed to be // present so we call the hessain function cost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient, hessian); } else { // User does not need derivatives to have been computed cost_function_ = optimizable.calc_cost_function(x); } } return status_; } // Minimize the cost function embodied in "optimizable" using the // Levenberg-Marquardt algorithm, where "x" is the initial state // vector and also where the solution is stored, subject to the // constraint that x lies between min_x and max_x. MinimizerStatus Minimizer::minimize_levenberg_marquardt_bounded(Optimizable& optimizable, Vector x, const Vector& min_x, const Vector& max_x, bool use_additive_damping) { if (any(min_x >= max_x) || min_x.size() != x.size() || max_x.size() != x.size()) { return MINIMIZER_STATUS_INVALID_BOUNDS; } int nx = x.size(); // Initial values n_iterations_ = 0; n_samples_ = 0; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; cost_function_ = std::numeric_limits::infinity(); Real new_cost; // The main memory storage for the Levenberg family of methods // consists of the following three vectors... Vector new_x(nx); Vector gradient(nx); Vector dx(nx); // ...and the Hessian matrix, which is stored explicitly SymmMatrix hessian(nx); SymmMatrix modified_hessian(nx); SymmMatrix sub_hessian; Vector sub_gradient; Vector sub_dx; hessian = 0.0; Real damping = levenberg_damping_start_; // Which state variables are at the minimum bound (-1), maximum // bound (1) or free (0)? intVector bound_status(nx); bound_status = 0; // Ensure that initial x lies within the specified bounds bound_status.where(x >= max_x) = 1; bound_status.where(x <= min_x) = -1; x = max(min_x, min(x, max_x)); int nbound = count(bound_status != 0); int nfree = nx - nbound; gradient_norm_ = -1.0; // Original Levenberg is additive to the diagonal of the Hessian // so to make the performance insensitive to an arbitrary scaling // of the cost function, we scale the damping factor by the mean // of the diagonal of the Hessian Real diag_scaling; // Does the last calculation of the cost function in "optimizable" // match the current contents of the state vector x? -1=no, 0=yes, // 1=yes and the last calculation included the gradient, 2=yes and // the last calculation included gradient and Hessian. int state_up_to_date = -1; do { // At this point we have either just started or have just // reduced the cost function cost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient, hessian); diag_scaling = mean(hessian.diag_vector()); state_up_to_date = 2; ++n_samples_; if (n_iterations_ == 0) { start_cost_function_ = cost_function_; } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } // Find which dimensions are in play if (nbound > 0) { // We release any dimensions from being at a minimum or // maximum bound if two conditions are met: (1) the gradient // in that dimension slopes away from the bound, and (2) the // Levenberg-Marquardt formula to compute dx using the current // value of "damping" leads to a point on the valid side of the // bound modified_hessian = hessian; if (!use_additive_damping) { modified_hessian.diag_vector() *= (1.0 + damping); } else { modified_hessian.diag_vector() += damping*diag_scaling; } dx = -adept::solve(modified_hessian, gradient); // Release points at the minimum bound bound_status.where(bound_status == -1 && gradient < 0.0 && dx > 0.0) = 0; // Release points at the maximum bound bound_status.where(bound_status == 1 && gradient > 0.0 && dx < 0.0) = 0; } nbound = count(bound_status != 0); nfree = nx - nbound; // List of indices of free state variables intVector ifree(nfree); if (nbound > 0) { ifree = find(bound_status == 0); } else { ifree = range(0, nx-1); } // Compute L2 norm of gradient to see how "flat" the environment // is, restricting ourselves to the dimensions currently in play if (nfree > 0) { gradient_norm_ = norm2(gradient(ifree)); } else { // If no dimensions are in play we are at a corner of the // bounds and the gradient is pointing into the corner: we // have reached a minimum in the cost function subject to the // bounds so have converged gradient_norm_ = 0.0; } // Report progress using user-defined function optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_); // Convergence has been achieved if the L2 norm has been reduced // to a user-specified threshold if (gradient_norm_ <= converged_gradient_norm_) { status_ = MINIMIZER_STATUS_SUCCESS; break; } sub_gradient.clear(); sub_hessian.clear(); if (nbound > 0) { sub_gradient = gradient(ifree); sub_hessian = SymmMatrix(Matrix(hessian)(ifree,ifree)); } else { sub_gradient >>= gradient; sub_hessian >>= hessian; } // FIX reuse dx if possible below... // Try to minimize cost function Real previous_diag_scaling = 1.0; // Used in Levenberg-Marquardt version Real previous_diag_modifier = 0.0; // Used in Levenberg version while(true) { sub_dx.resize(nfree); if (!use_additive_damping) { // Levenberg-Marquardt formula: scale the diagonal of the // Hessian, where the larger the value of "damping", the // closer the resulting behaviour is to steepest descent sub_hessian.diag_vector() *= (1.0 + damping)/previous_diag_scaling; previous_diag_scaling = 1.0 + damping; } else { // Older Levenberg approach: add to the diagonal instead sub_hessian.diag_vector() += damping*diag_scaling - previous_diag_modifier; previous_diag_modifier = damping*diag_scaling; } sub_dx = -adept::solve(sub_hessian, sub_gradient); // Limit the maximum step size, if required if (max_step_size_ > 0.0) { Real max_dx = maxval(abs(sub_dx)); if (max_dx > max_step_size_) { sub_dx *= (max_step_size_/max_dx); } } // Check for collision with new bounds intVector new_min_bounds = find(x(ifree)+sub_dx <= min_x(ifree)); intVector new_max_bounds = find(x(ifree)+sub_dx >= max_x(ifree)); Real mmin_frac = 2.0; Real mmax_frac = 2.0; int imin = 0, imax = 0; if (!new_min_bounds.empty()) { Vector min_frac = -(x(ifree(new_min_bounds)) - min_x(ifree(new_min_bounds))) / sub_dx(new_min_bounds); mmin_frac = minval(min_frac); imin = new_min_bounds(minloc(min_frac)); } if (!new_max_bounds.empty()) { Vector max_frac = (max_x(ifree(new_max_bounds)) - x(ifree(new_max_bounds))) / sub_dx(new_max_bounds); mmax_frac = minval(max_frac); imax = new_max_bounds(maxloc(max_frac)); } Real frac = 1.0; int bound_type = 0; int ibound = 0; if (mmin_frac <= 1.0 || mmax_frac <= 1.0) { if (mmin_frac < mmax_frac) { frac = mmin_frac; ibound = imin; bound_type = -1; } else { frac = mmax_frac; ibound = imax; bound_type = 1; } sub_dx *= frac; } // Compute new state vector and cost function, but not // gradient or Hessian for efficiency new_x = x; new_x(ifree) += sub_dx; new_cost = optimizable.calc_cost_function(new_x); state_up_to_date = -1; ++n_samples_; // If cost function is not finite it may be possible to // recover by trying smaller step sizes bool cost_invalid = !std::isfinite(new_cost); if (new_cost >= cost_function_ || cost_invalid) { // We haven't managed to reduce the cost function: increase // damping value to take smaller steps if (damping <= 0.0) { damping = levenberg_damping_restart_; } else if (damping < levenberg_damping_max_) { damping *= levenberg_damping_multiplier_; } else { // The damping value is now larger than the maximum so we // can get no further if (cost_invalid) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; } else { status_ = MINIMIZER_STATUS_FAILED_TO_CONVERGE; } break; } } else { // Managed to reduce cost function x = new_x; n_iterations_++; if (frac < 1.0) { // Found a new bound bound_status(ifree(ibound)) = bound_type; } // Reduce damping for next iteration if (damping > levenberg_damping_min_) { damping /= levenberg_damping_divider_; } else { damping = 0.0; } if (n_iterations_ >= max_iterations_) { status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED; } break; } } // Inner loop } while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED); if (state_up_to_date < ensure_updated_state_) { // The last call to calc_cost_function* was not with the state // vector returned to the user, and they want it to be. Note // that the cost function and gradient norm ought to be // up-to-date already at this point. if (ensure_updated_state_ > 0) { // User wants at least the first derivative, but // calc_cost_function_gradient() is not guaranteed to be // present so we call the hessain function cost_function_ = optimizable.calc_cost_function_gradient_hessian(x, gradient, hessian); } else { // User does not need derivatives to have been computed cost_function_ = optimizable.calc_cost_function(x); } } return status_; } }; ================================================ FILE: adept/minimize_limited_memory_bfgs.cpp ================================================ /* minimize_limited_memory_bfgs.cpp -- Minimize function using Limited-Memory BFGS algorithm Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include namespace adept { // Structure for storing data from previous iterations used by // L-BFGS minimization algorithm class LbfgsData { public: LbfgsData(int nx, int ni) : nx_(nx), ni_(ni), iteration_(0) { x_diff_.resize(ni,nx); gradient_diff_.resize(ni,nx); rho_.resize(ni); alpha_.resize(ni); gamma_.resize(ni); } // Return false if the dot product of x_diff and gradient_diff is // zero, true otherwise void store(int iter, const Vector& x_diff, const Vector& gradient_diff) { int index = (iter-1) % ni_; x_diff_[index] = x_diff; gradient_diff_[index] = gradient_diff; Real dp = dot_product(x_diff, gradient_diff); if (std::fabs(dp) > 10.0*std::numeric_limits::min()) { rho_[index] = 1.0 / dp; } else if (dp >= 0.0) { rho_[index] = 1.0 / std::max(dp, 10.0*std::numeric_limits::min()); } else { rho_[index] = 1.0 / std::min(dp, -10.0*std::numeric_limits::min()); } } // Return read-only vectors containing the differences between // state vectors and gradients at sequential iterations, by // slicing off the appropriate row of the matrix Vector x_diff(int iter) { return x_diff_[iter % ni_]; }; Vector gradient_diff(int iter) { return gradient_diff_[iter % ni_]; }; Real& alpha(int iter) { return alpha_[iter % ni_]; } Real rho(int iter) const { return rho_[iter % ni_]; } Real gamma(int iter) const { return gamma_[iter % ni_]; } private: // Data int nx_; // Number of state variables int ni_; // Number of iterations to store int iteration_; // Current iteration Matrix x_diff_; Matrix gradient_diff_; Vector rho_; Vector alpha_; Vector gamma_; }; // Minimize the cost function embodied in "optimizable" using the // Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) // algorithm, where "x" is the initial state vector and also where // the solution is stored. MinimizerStatus Minimizer::minimize_limited_memory_bfgs(Optimizable& optimizable, Vector x) { int nx = x.size(); // Initial values n_iterations_ = 0; n_samples_ = 0; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; cost_function_ = std::numeric_limits::infinity(); Vector previous_x(nx); Vector gradient(nx); Vector previous_gradient(nx); Vector direction(nx); Vector test_x(nx); // Used by the line search only // Previous states needed by the L-BFGS algorithm int n_states = std::min(nx, lbfgs_n_states_); LbfgsData data(nx, n_states); // Does the last calculation of the cost function in "optimizable" // match the current contents of the state vector x? -1=no, 0=yes, // 1=yes and the last calculation included the gradient, 2=yes and // the last calculation included gradient and Hessian. int state_up_to_date = -1; // Initial step size Real step_size = 1.0; if (max_step_size_ > 0.0) { step_size = max_step_size_; } // Main loop while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) { // If the last line search found a minimum along the lines // satisfying the Wolfe conditions, then the current cost // function and gradient will be consistent with the current // state vector. Otherwise we need to compute them. if (state_up_to_date < 1) { cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); state_up_to_date = 1; ++n_samples_; if (n_iterations_ == 0) { start_cost_function_ = cost_function_; } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } // Compute L2 norm of gradient to see how "flat" the environment // is gradient_norm_ = norm2(gradient); // Report progress using user-defined function optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_); // Convergence has been achieved if the L2 norm has been reduced // to a user-specified threshold if (gradient_norm_ <= converged_gradient_norm_) { status_ = MINIMIZER_STATUS_SUCCESS; break; } // Store state and gradient differences if (n_iterations_ > 0) { data.store(n_iterations_, x-previous_x, gradient-previous_gradient); } // Find search direction: see page 779 of Nocedal (1980): // Updating quasi-Newton matrices with limited // storage. Mathematics of Computation, 35, 773-782. direction = gradient; if (n_iterations_ > 0) { for (int ii = n_iterations_-1; ii >= std::max(0,n_iterations_-n_states); --ii) { data.alpha(ii) = data.rho(ii) * dot_product(data.x_diff(ii), direction); direction -= data.alpha(ii) * data.gradient_diff(ii); } Real gamma = dot_product(x-previous_x, gradient-previous_gradient) / std::max(10.0*std::numeric_limits::min(), dot_product(gradient-previous_gradient, gradient-previous_gradient)); direction *= gamma; for (int ii = std::max(0,n_iterations_-n_states); ii < n_iterations_; ++ii) { Real beta = data.rho(ii) * dot_product(data.gradient_diff(ii), direction); direction += data.x_diff(ii) * (data.alpha(ii)-beta); } direction = -direction; } else { direction = -gradient * (step_size / norm2(gradient)); } // Store state and gradient previous_x = x; previous_gradient = gradient; // Perform line search, storing new state vector in x, and // returning MINIMIZER_STATUS_NOT_YET_CONVERGED on success Real curvature_coeff = lbfgs_curvature_coeff_; if (n_iterations_ < n_states) { // In the early iterations we require the line search to be // more accurate since the L-BFGS update will have fewer // states to make a good estimate of the minimum; interpolate // between the Conjugate Gradient and L-BFGS curvature // coefficients curvature_coeff = (cg_curvature_coeff_ * (n_states-n_iterations_) + lbfgs_curvature_coeff_ * n_iterations_) / n_states; } // Direction points to the best estimate of the actual location // of the minimum, so the step size is the norm of the direction // vector step_size = norm2(direction); MinimizerStatus ls_status = line_search(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, curvature_coeff); if (ls_status == MINIMIZER_STATUS_SUCCESS) { // Successfully minimized along search direction: continue to // next iteration status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; } else { // Unrecoverable failure in line-search: return status to // calling function status_ = ls_status; } ++n_iterations_; if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED && n_iterations_ >= max_iterations_) { status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED; } // End of main loop: if status_ is anything other than // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations // are performed } if (state_up_to_date < ensure_updated_state_) { // The last call to calc_cost_function* was not with the state // vector returned to the user, and they want it to be. if (ensure_updated_state_ > 0) { // User wants at least the first derivative cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); } else { // User does not need derivatives to have been computed cost_function_ = optimizable.calc_cost_function(x); } } return status_; } // Minimize the cost function embodied in "optimizable" using the // Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) // algorithm, where "x" is the initial state vector and also where // the solution is stored. MinimizerStatus Minimizer::minimize_limited_memory_bfgs_bounded(Optimizable& optimizable, Vector x, const Vector& min_x, const Vector& max_x) { if (any(min_x >= max_x) || min_x.size() != x.size() || max_x.size() != x.size()) { return MINIMIZER_STATUS_INVALID_BOUNDS; } int nx = x.size(); // Initial values n_iterations_ = 0; n_samples_ = 0; status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; cost_function_ = std::numeric_limits::infinity(); Vector previous_x(nx); Vector gradient(nx); Vector previous_gradient(nx); Vector direction(nx); Vector test_x(nx); // Used by the line search only // Previous states needed by the L-BFGS algorithm int n_states = std::min(nx, lbfgs_n_states_); LbfgsData data(nx, n_states); // Which state variables are at the minimum bound (-1), maximum // bound (1) or free (0)? intVector bound_status(nx); bound_status = 0; // Ensure that initial x lies within the specified bounds bound_status.where(x >= max_x) = 1; bound_status.where(x <= min_x) = -1; x = max(min_x, min(x, max_x)); int nbound = count(bound_status != 0); int nfree = nx - nbound; // Floating-point number containing 1.0 if unbound and 0.0 if // bound Vector unbound_status(nx); unbound_status = 1.0-fabs(bound_status); // If we reach a bound we need to restart the L-BFGS storage, so // store the iteration at the last restart int iteration_last_restart = 0; // Does the last calculation of the cost function in "optimizable" // match the current contents of the state vector x? -1=no, 0=yes, // 1=yes and the last calculation included the gradient, 2=yes and // the last calculation included gradient and Hessian. int state_up_to_date = -1; // Initial step size Real step_size = 1.0; if (max_step_size_ > 0.0) { step_size = max_step_size_; } // Main loop while (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED) { // If the last line search found a minimum along the lines // satisfying the Wolfe conditions, then the current cost // function and gradient will be consistent with the current // state vector. Otherwise we need to compute them. if (state_up_to_date < 1) { cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); state_up_to_date = 1; ++n_samples_; if (n_iterations_ == 0) { start_cost_function_ = cost_function_; } // Check cost function and gradient are finite if (!std::isfinite(cost_function_)) { status_ = MINIMIZER_STATUS_INVALID_COST_FUNCTION; break; } else if (any(!isfinite(gradient))) { status_ = MINIMIZER_STATUS_INVALID_GRADIENT; break; } } // Check whether the bound status of each state variable is // consistent with the gradient if a steepest descent were to be // taken, and if not flag a restart if (any(bound_status == -1 && gradient < 0.0) || any(bound_status == 1 && gradient > 0.0)) { bound_status.where(bound_status == -1 && gradient < 0.0) = 0; bound_status.where(bound_status == 1 && gradient > 0.0) = 0; unbound_status = 1.0-fabs(bound_status); iteration_last_restart = n_iterations_; } nbound = count(bound_status != 0); nfree = nx - nbound; // Set gradient at bound points to zero gradient.where(bound_status != 0) = 0.0; // Compute L2 norm of gradient to see how "flat" the environment // is if (nfree > 0) { gradient_norm_ = norm2(gradient); } else { // If no dimensions are in play we are at a corner of the // bounds and the gradient is pointing into the corner: we // have reached a minimum in the cost function subject to the // bounds so have converged gradient_norm_ = 0.0; } // Report progress using user-defined function optimizable.report_progress(n_iterations_, x, cost_function_, gradient_norm_); // Convergence has been achieved if the L2 norm has been reduced // to a user-specified threshold if (gradient_norm_ <= converged_gradient_norm_) { status_ = MINIMIZER_STATUS_SUCCESS; break; } // Store state and gradient differences if (n_iterations_ > iteration_last_restart) { data.store(n_iterations_, x-previous_x, gradient-previous_gradient); } // Find search direction: see page 779 of Nocedal (1980): // Updating quasi-Newton matrices with limited // storage. Mathematics of Computation, 35, 773-782. direction = gradient; if (n_iterations_ > iteration_last_restart) { for (int ii = n_iterations_-1; ii >= std::max(iteration_last_restart,n_iterations_-n_states); --ii) { data.alpha(ii) = data.rho(ii) * dot_product(data.x_diff(ii), direction); direction -= data.alpha(ii) * data.gradient_diff(ii); } Real gamma = dot_product(x-previous_x, gradient-previous_gradient) / std::max(10.0*std::numeric_limits::min(), dot_product(gradient-previous_gradient, gradient-previous_gradient)); direction *= gamma; for (int ii = std::max(iteration_last_restart,n_iterations_-n_states); ii < n_iterations_; ++ii) { Real beta = data.rho(ii) * dot_product(data.gradient_diff(ii), direction); direction += data.x_diff(ii) * (data.alpha(ii)-beta); } direction = -direction; } else { // We are either at the first iteration or have restarted // having changed the bound dimensions: use steepest descent direction = -gradient * (step_size / norm2(gradient)); } // Store state and gradient previous_x = x; previous_gradient = gradient; // Perform line search, storing new state vector in x, and // returning MINIMIZER_STATUS_NOT_YET_CONVERGED on success Real curvature_coeff = lbfgs_curvature_coeff_; int n_stored_iterations = n_iterations_ - iteration_last_restart; if (n_stored_iterations < n_states) { // In the early iterations we require the line search to be // more accurate since the L-BFGS update will have fewer // states to make a good estimate of the minimum; interpolate // between the Conjugate Gradient and L-BFGS curvature // coefficients curvature_coeff = (cg_curvature_coeff_ * (n_states-n_stored_iterations) + lbfgs_curvature_coeff_ * n_stored_iterations) / n_states; } // Direction points to the best estimate of the actual location // of the minimum, so the step size is the norm of the direction // vector step_size = norm2(direction); // Distance to the nearest bound Real dir_scaling = step_size; Real bound_step_size = std::numeric_limits::max(); int i_nearest_bound = -1; int i_bound_type = 0; // Work out the maximum step size along "direction" before a // bound is met... there must be a faster way to do this for (int ix = 0; ix < nx; ++ix) { if (direction(ix) > 0.0 && max_x(ix) < std::numeric_limits::max()) { Real local_bound_step_size = dir_scaling*(max_x(ix)-x(ix))/direction(ix); if (bound_step_size >= local_bound_step_size) { bound_step_size = local_bound_step_size; i_nearest_bound = ix; i_bound_type = 1; } } else if (direction(ix) < 0.0 && min_x(ix) > -std::numeric_limits::max()) { Real local_bound_step_size = dir_scaling*(min_x(ix)-x(ix))/direction(ix); if (bound_step_size >= local_bound_step_size) { bound_step_size = local_bound_step_size; i_nearest_bound = ix; i_bound_type = -1; } } } MinimizerStatus ls_status; // line-search outcome if (i_nearest_bound >= 0) { // Perform line search, storing new state vector in x ls_status = line_search(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, curvature_coeff, bound_step_size); if (ls_status == MINIMIZER_STATUS_BOUND_REACHED) { bound_status(i_nearest_bound) = i_bound_type; // Restart the L-BFGS storage iteration_last_restart = n_iterations_+1; ls_status = MINIMIZER_STATUS_SUCCESS; } } else { // Perform line search, storing new state vector in x ls_status = line_search(optimizable, x, direction, test_x, step_size, gradient, state_up_to_date, curvature_coeff); } if (ls_status == MINIMIZER_STATUS_SUCCESS) { // Successfully minimized along search direction: continue to // next iteration status_ = MINIMIZER_STATUS_NOT_YET_CONVERGED; } else { // Unrecoverable failure in line-search: return status to // calling function status_ = ls_status; } ++n_iterations_; if (status_ == MINIMIZER_STATUS_NOT_YET_CONVERGED && n_iterations_ >= max_iterations_) { status_ = MINIMIZER_STATUS_MAX_ITERATIONS_REACHED; } // End of main loop: if status_ is anything other than // MINIMIZER_STATUS_NOT_YET_CONVERGED then no more iterations // are performed } if (state_up_to_date < ensure_updated_state_) { // The last call to calc_cost_function* was not with the state // vector returned to the user, and they want it to be. if (ensure_updated_state_ > 0) { // User wants at least the first derivative cost_function_ = optimizable.calc_cost_function_gradient(x, gradient); } else { // User does not need derivatives to have been computed cost_function_ = optimizable.calc_cost_function(x); } } return status_; } }; ================================================ FILE: adept/settings.cpp ================================================ /* settings.cpp -- View/change the overall Adept settings Copyright (C) 2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include #include #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifdef HAVE_OPENBLAS_CBLAS_HEADER #include #endif namespace adept { // ------------------------------------------------------------------- // Get compile-time settings // ------------------------------------------------------------------- // Return the version of Adept at compile time std::string version() { return ADEPT_VERSION_STR; } // Return the compiler used to compile the Adept library (e.g. "g++ // [4.3.2]" or "Microsoft Visual C++ [1800]") std::string compiler_version() { #ifdef CXX std::string cv = CXX; // Defined in config.h #elif defined(_MSC_VER) std::string cv = "Microsoft Visual C++"; #else std::string cv = "unknown"; #endif #ifdef __GNUC__ #define STRINGIFY3(A,B,C) STRINGIFY(A) "." STRINGIFY(B) "." STRINGIFY(C) #define STRINGIFY(A) #A cv += " [" STRINGIFY3(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) "]"; #undef STRINGIFY #undef STRINGIFY3 #elif defined(_MSC_VER) #define STRINGIFY1(A) STRINGIFY(A) #define STRINGIFY(A) #A cv += " [" STRINGIFY1(_MSC_VER) "]"; #undef STRINGIFY #undef STRINGIFY1 #endif return cv; } // Return the compiler flags used when compiling the Adept library // (e.g. "-Wall -g -O3") std::string compiler_flags() { #ifdef CXXFLAGS return CXXFLAGS; // Defined in config.h #else return "unknown"; #endif } // Return a multi-line string listing numerous aspects of the way // Adept has been configured. std::string configuration() { std::stringstream s; s << "Adept version " << adept::version() << ":\n"; s << " Compiled with " << adept::compiler_version() << "\n"; s << " Compiler flags \"" << adept::compiler_flags() << "\"\n"; #ifdef BLAS_LIBS if (std::strlen(BLAS_LIBS) > 2) { const char* blas_libs = &BLAS_LIBS[2]; s << " BLAS support from " << blas_libs << " library\n"; } else { s << " BLAS support from built-in library\n"; } #endif #ifdef HAVE_OPENBLAS_CBLAS_HEADER s << " Number of BLAS threads may be specified up to maximum of " << max_blas_threads() << "\n"; #endif s << " Jacobians processed in blocks of size " << ADEPT_MULTIPASS_SIZE << "\n"; return s.str(); } // ------------------------------------------------------------------- // Get/set number of threads for array operations // ------------------------------------------------------------------- // Get the maximum number of threads available for BLAS operations int max_blas_threads() { #ifdef HAVE_OPENBLAS_CBLAS_HEADER return openblas_get_num_threads(); #else return 1; #endif } // Set the maximum number of threads available for BLAS operations // (zero means use the maximum sensible number on the current // system), and return the number actually set. Note that OpenBLAS // uses pthreads and the Jacobian calculation uses OpenMP - this can // lead to inefficient behaviour so if you are computing Jacobians // then you may get better performance by setting the number of // array threads to one. int set_max_blas_threads(int n) { #ifdef HAVE_OPENBLAS_CBLAS_HEADER openblas_set_num_threads(n); return openblas_get_num_threads(); #else return 1; #endif } // Was the library compiled with matrix multiplication support (from // BLAS)? bool have_matrix_multiplication() { #ifdef HAVE_BLAS return true; #else return false; #endif } // Was the library compiled with linear algebra support (e.g. inv // and solve from LAPACK) bool have_linear_algebra() { #ifdef HAVE_LAPACK return true; #else return false; #endif } } // End namespace adept ================================================ FILE: adept/solve.cpp ================================================ /* solve.cpp -- Solve systems of linear equations using LAPACK Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include #include #include #include // If ADEPT_SOURCE_H is defined then we are in a header file generated // from all the source files, so cpplapack.h will already have been // included #ifndef AdeptSource_H #include "cpplapack.h" #endif #ifdef HAVE_LAPACK namespace adept { using namespace internal; // ------------------------------------------------------------------- // Solve Ax = b for general square matrix A // ------------------------------------------------------------------- template Array<1,T,false> solve(const Array<2,T,false>& A, const Array<1,T,false>& b) { Array<2,T,false> A_; Array<1,T,false> b_; // LAPACKE is more efficient with column-major input // if (A.is_row_contiguous()) { A_.resize_column_major(A.dimensions()); A_ = A; // } // else { // A_.link(A); // } // if (b_.offset(0) != 0) { b_ = b; // } // else { // b_.link(b); // } std::vector ipiv(A_.dimension(0)); // lapack_int status = LAPACKE_dgesv(LAPACK_COL_MAJOR, A_.dimension(0), 1, // A_.data(), A_.offset(1), &ipiv[0], // b_.data(), b_.dimension(0)); lapack_int status = cpplapack_gesv(A_.dimension(0), 1, A_.data(), A_.offset(1), &ipiv[0], b_.data(), b_.dimension(0)); if (status != 0) { std::stringstream s; s << "Failed to solve general system of equations: LAPACK ?gesv returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } return b_; } // ------------------------------------------------------------------- // Solve AX = B for general square matrix A and rectangular matrix B // ------------------------------------------------------------------- template Array<2,T,false> solve(const Array<2,T,false>& A, const Array<2,T,false>& B) { Array<2,T,false> A_; Array<2,T,false> B_; // LAPACKE is more efficient with column-major input // if (A.is_row_contiguous()) { A_.resize_column_major(A.dimensions()); A_ = A; // } // else { // A_.link(A); // } // if (B.is_row_contiguous()) { B_.resize_column_major(B.dimensions()); B_ = B; // } // else { // B_.link(B); // } std::vector ipiv(A_.dimension(0)); // lapack_int status = LAPACKE_dgesv(LAPACK_COL_MAJOR, A_.dimension(0), B.dimension(1), // A_.data(), A_.offset(1), &ipiv[0], // B_.data(), B_.offset(1)); lapack_int status = cpplapack_gesv(A_.dimension(0), B.dimension(1), A_.data(), A_.offset(1), &ipiv[0], B_.data(), B_.offset(1)); if (status != 0) { std::stringstream s; s << "Failed to solve general system of equations for matrix RHS: LAPACK ?gesv returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } return B_; } // ------------------------------------------------------------------- // Solve Ax = b for symmetric square matrix A // ------------------------------------------------------------------- template Array<1,T,false> solve(const SpecialMatrix,false>& A, const Array<1,T,false>& b) { SpecialMatrix,false> A_; Array<1,T,false> b_; // Not sure why the original code copies A... A_.resize(A.dimension()); A_ = A; // A_.link(A); // if (b.offset(0) != 1) { b_ = b; // } // else { // b_.link(b); // } // Treat symmetric matrix as column-major char uplo; if (Orient == ROW_LOWER_COL_UPPER) { uplo = 'U'; } else { uplo = 'L'; } std::vector ipiv(A_.dimension()); // lapack_int status = LAPACKE_dsysv(LAPACK_COL_MAJOR, uplo, A_.dimension(0), 1, // A_.data(), A_.offset(), &ipiv[0], // b_.data(), b_.dimension(0)); lapack_int status = cpplapack_sysv(uplo, A_.dimension(0), 1, A_.data(), A_.offset(), &ipiv[0], b_.data(), b_.dimension(0)); if (status != 0) { // std::stringstream s; // s << "Failed to solve symmetric system of equations: LAPACK ?sysv returned code " << status; // throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); std::cerr << "Warning: LAPACK solve symmetric system failed (?sysv): trying general (?gesv)\n"; return solve(Array<2,T,false>(A_),b_); } return b_; } // ------------------------------------------------------------------- // Solve AX = B for symmetric square matrix A // ------------------------------------------------------------------- template Array<2,T,false> solve(const SpecialMatrix,false>& A, const Array<2,T,false>& B) { SpecialMatrix,false> A_; Array<2,T,false> B_; A_.resize(A.dimension()); A_ = A; // A_.link(A); // if (B.is_row_contiguous()) { B_.resize_column_major(B.dimensions()); B_ = B; // } // else { // B_.link(B); // } // Treat symmetric matrix as column-major char uplo; if (Orient == ROW_LOWER_COL_UPPER) { uplo = 'U'; } else { uplo = 'L'; } std::vector ipiv(A_.dimension()); // lapack_int status = LAPACKE_dsysv(LAPACK_COL_MAJOR, uplo, A_.dimension(0), B.dimension(1), // A_.data(), A_.offset(), &ipiv[0], // B_.data(), B_.offset(1)); lapack_int status = cpplapack_sysv(uplo, A_.dimension(0), B.dimension(1), A_.data(), A_.offset(), &ipiv[0], B_.data(), B_.offset(1)); if (status != 0) { std::stringstream s; s << "Failed to solve symmetric system of equations with matrix RHS: LAPACK ?sysv returned code " << status; throw(matrix_ill_conditioned(s.str() ADEPT_EXCEPTION_LOCATION)); } return B_; } } #else namespace adept { using namespace internal; // ------------------------------------------------------------------- // Solve Ax = b for general square matrix A // ------------------------------------------------------------------- template Array<1,T,false> solve(const Array<2,T,false>& A, const Array<1,T,false>& b) { throw feature_not_available("Cannot solve linear equations because compiled without LAPACK"); } // ------------------------------------------------------------------- // Solve AX = B for general square matrix A and rectangular matrix B // ------------------------------------------------------------------- template Array<2,T,false> solve(const Array<2,T,false>& A, const Array<2,T,false>& B) { throw feature_not_available("Cannot solve linear equations because compiled without LAPACK"); } // ------------------------------------------------------------------- // Solve Ax = b for symmetric square matrix A // ------------------------------------------------------------------- template Array<1,T,false> solve(const SpecialMatrix,false>& A, const Array<1,T,false>& b) { throw feature_not_available("Cannot solve linear equations because compiled without LAPACK"); } // ------------------------------------------------------------------- // Solve AX = B for symmetric square matrix A // ------------------------------------------------------------------- template Array<2,T,false> solve(const SpecialMatrix,false>& A, const Array<2,T,false>& B) { throw feature_not_available("Cannot solve linear equations because compiled without LAPACK"); } } #endif namespace adept { // ------------------------------------------------------------------- // Explicit instantiations // ------------------------------------------------------------------- #define ADEPT_EXPLICIT_SOLVE(TYPE,RRANK) \ template Array \ solve(const Array<2,TYPE,false>& A, const Array& b); \ template Array \ solve(const SpecialMatrix,false>& A, \ const Array& b); \ template Array \ solve(const SpecialMatrix,false>& A, \ const Array& b); ADEPT_EXPLICIT_SOLVE(float,1) ADEPT_EXPLICIT_SOLVE(float,2) ADEPT_EXPLICIT_SOLVE(double,1) ADEPT_EXPLICIT_SOLVE(double,2) #undef ADEPT_EXPLICIT_SOLVE } ================================================ FILE: adept/vector_utilities.cpp ================================================ /* vector_utilities.cpp -- Vector utility functions Copyright (C) 2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #include namespace adept { Array<1,Real,false> linspace(Real x1, Real x2, Index n) { Array<1,Real,false> ans(n); if (n > 1) { for (Index i = 0; i < n; ++i) { ans(i) = x1 + (x2-x1)*i / static_cast(n-1); } } else if (n == 1 && x1 == x2) { ans(0) = x1; return ans; } else if (n == 1) { throw(invalid_operation("linspace(x1,x2,n) with n=1 only valid if x1=x2")); } return ans; } } ================================================ FILE: benchmark/Makefile.am ================================================ check_PROGRAMS = autodiff_benchmark animate matrix_benchmark math_benchmark autodiff_benchmark_SOURCES = autodiff_benchmark.cpp \ differentiator.h advection_schemes.h \ advection_schemes_AD.h advection_schemes_K.h nx.h autodiff_benchmark_CPPFLAGS = -I@top_srcdir@/include autodiff_benchmark_LDFLAGS = -static -no-install -L@top_srcdir@/adept/.libs autodiff_benchmark_LDADD = -ladept animate_SOURCES = animate.cpp animate_CPPFLAGS = -I@top_srcdir@/include matrix_benchmark_SOURCES = matrix_benchmark.cpp matrix_benchmark_CPPFLAGS = -I@top_srcdir@/include matrix_benchmark_LDFLAGS = -static -no-install -L@top_srcdir@/adept/.libs matrix_benchmark_LDADD = -ladept math_benchmark_SOURCES = math_benchmark.cpp math_benchmark_CPPFLAGS = -I@top_srcdir@/include math_benchmark_LDFLAGS = -static -no-install -L@top_srcdir@/adept/.libs math_benchmark_LDADD = -ladept ================================================ FILE: benchmark/advection_schemes.h ================================================ /* advection_schemes.h - Two test advection algorithms from the Adept paper Copyright (C) 2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // Use templates so that these functions can be easily compiled with // different automatic differentiation tools in order that the // performance of these tools can be compared. #ifndef ADVECTION_SCHEMES_H #define ADVECTION_SCHEMES_H 1 #include // Use a fixed problem size #include "nx.h" // Lax-Wendroff scheme applied to linear advection template void lax_wendroff(int nt, Real c, const aReal q_init[NX], aReal q[NX]) { aReal flux[NX-1]; // Fluxes between boxes for (int i=0; i void toon(int nt, Real c, const aReal q_init[NX], aReal q[NX]) { aReal flux[NX-1]; // Fluxes between boxes for (int i=0; i q[i] || bigdiff < -q[i]) { flux[i] = (exp(c*log(q[i]/q[i+1]))-1.0) * q[i]*q[i+1] / (q[i]-q[i+1]); // } // else { // flux[i] = c*q[i]; // Upwind scheme // } } for (int i=1; i struct is_active { static const bool value = false; }; template <> struct is_active { static const bool value = true; }; // Lax-Wendroff scheme applied to linear advection template void lax_wendroff_vector(int nt, Real c, const aReal q_init[NX], aReal q[NX]) { using namespace adept; typedef adept::Array<1,Real,::is_active::value> my_vector; // typedef adept::Array<1,Real,true> my_vector; my_vector Q(NX); my_vector F(NX-1); my_vector Qleft = Q(range(0,end-1)); my_vector Qright = Q(range(1,end)); my_vector Qcentre = Q(range(1,end-1)); my_vector Fleft = F(range(0,end-1)); my_vector Fright = F(range(1,end)); for (int i=0; i void toon_vector(int nt, Real c, const aReal q_init[NX], aReal q[NX]) { using namespace adept; typedef adept::Array<1,Real,::is_active::value> my_vector; my_vector Q(NX); my_vector F(NX-1); my_vector Qleft = Q(range(0,end-1)); my_vector Qright = Q(range(1,end)); my_vector Qcentre = Q(range(1,end-1)); my_vector Fleft = F(range(0,end-1)); my_vector Fright = F(range(1,end)); for (int i=0; i void lax_wendroff_AD(int nt, real c, const real q_init[NX], real q[NX], const real q_AD_const[NX], real q_init_AD[NX]) { // Forward pass real flux[NX-1]; for (int i = 0; i < NX; i++) q[i] = q_init[i]; // Forward pass for (int j = 0; j < nt; j++) { for (int i = 0; i < NX-1; i++) flux[i] = 0.5*c*(q[i]+q[i+1]+c*(q[i]-q[i+1])); for (int i = 1; i < NX-1; i++) q[i] += flux[i-1]-flux[i]; q[0] = q[NX-2]; q[NX-1] = q[1]; // Treat boundary conditions } real q_AD[NX]; real flux_AD[NX-1]; for (int i = 0; i < NX; i++) q_AD[i] = q_AD_const[i]; for (int i = 0; i < NX-1; i++) flux_AD[i] = 0.0; // Reverse pass for (int j = nt-1; j >= 0; j--) { q_AD[NX-2] += q_AD[0]; q_AD[0] = 0.0; q_AD[1] += q_AD[NX-1]; q_AD[NX-1] = 0.0; for(int i = 1; i < NX-1; i++) { flux_AD[i-1] += q_AD[i]; flux_AD[i] -= q_AD[i]; // q_AD[i] = 0.0; } real factor1 = 0.5*c*(1.0+c); real factor2 = 0.5*c*(1.0-c); for (int i = 0; i < NX-1; i++) { q_AD[i] += factor1*flux_AD[i]; q_AD[i+1] += factor2*flux_AD[i]; flux_AD[i] = 0.0; } } for (int i = 0; i < NX; i++) { q_init_AD[i] = q_AD[i]; q_AD[i] = 0.0; } } // Hand-coded adjoint of Toon advection scheme template void toon_AD(int nt, real c, const real q_init[NX], real q_out[NX], const real q_AD_const[NX], real q_init_AD[NX]) { // Forward pass real flux[NX-1]; real* q_save = new real[NX*(nt+1)]; // real q_save[NX*(nt+1)]; real* q = &(q_save[0]); for (int i = 0; i < NX; i++) q[i] = q_init[i]; // Forward pass for (int j = 0; j < nt; j++) { for (int i=0; i= 0; j--) { q_AD[NX-2] += q_AD[0]; q_AD[0] = 0.0; q_AD[1] += q_AD[NX-1]; q_AD[NX-1] = 0.0; for(int i = 1; i < NX-1; i++) { flux_AD[i-1] += q_AD[i]; flux_AD[i] -= q_AD[i]; // q_AD[i] = 0.0; } q -= NX; for (int i = 0; i < NX-1; i++) { real factor = exp(c*log(q[i]/q[i+1])); real one_over_q_i = 1.0/q[i]; real one_over_q_i_plus_one = 1.0/q[i+1]; // Up to and including Adept 2.0.5 this was the incorrect line: // real one_over_denominator = 1.0/(one_over_q_i+one_over_q_i_plus_one); // This is the corrected line: real one_over_denominator = 1.0/(one_over_q_i_plus_one-one_over_q_i); q_AD[i] += one_over_denominator*one_over_q_i * (c*factor - (factor-1.0)*one_over_denominator*one_over_q_i) * flux_AD[i]; q_AD[i+1] += one_over_denominator*one_over_q_i_plus_one * (- c*factor + (factor-1.0)*one_over_denominator*one_over_q_i_plus_one) * flux_AD[i]; flux_AD[i] = 0.0; } } for (int i = 0; i < NX; i++) { q_init_AD[i] = q_AD[i]; q_AD[i] = 0.0; } delete[] q_save; } #endif ================================================ FILE: benchmark/advection_schemes_K.h ================================================ /* advection_schemes_K.h - Header for hand-coded Jacobians Copyright (C) 2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #ifndef ADVECTION_SCHEMES_K_H #define ADVECTION_SCHEMES_K_H #include #include #include "nx.h" // Lax-Wendroff scheme applied to linear advection template void lax_wendroff_K(int nt, real c, const real q_init[NX], real q[NX], real jacobian[NX*NX]) { real flux[NX-1]; // Fluxes between boxes real flux_K[NX-1][NX]; // Flux Jacobian (dflux/dq_init) // real (&q_K)[NX][NX] = *reinterpret_cast(jacobian); real q_K[NX][NX]; real coeff1 = 0.5*c*(1.0+c); real coeff2 = 0.5*c*(1.0-c); for (int i=0; i void toon_K(int nt, real c, const real q_init[NX], real q[NX], real jacobian[NX*NX]) { real flux[NX-1]; // Fluxes between boxes real flux_K[NX-1][NX]; real q_K[NX][NX]; for (int i=0; i q[i]*1.0e-6) { real factor = exp(c*log(q[i]/q[i+1])); real one_over_denominator = 1.0/(q[i]-q[i+1]); coeff1 = one_over_denominator*q[i+1] * (c*factor + (factor-1.0)*(1.0-q[i]*one_over_denominator)); coeff2 = one_over_denominator*q[i] * (- c*factor + (factor-1.0)*(1.0+q[i+1]*one_over_denominator)); flux[i] = (factor-1.0) * q[i]*q[i+1]*one_over_denominator; /* } else { flux[i] = c*q[i]; // Upwind scheme coeff1 = c; coeff2 = 0.0; } */ for (int k=0; k #include #include #include "advection_schemes.h" int main(int argc, char** argv) { double q1_save[NX]; double q2_save[NX]; double* q1 = q1_save; double* q2 = q2_save; double pi = 4.0*atan(1.0); double min_q = -0.2; double max_q = 1.2; double dq = 0.05; double dt = 0.125; int nt = 8; int cycles = 5; int j_min = min_q/dq; int j_max = max_q/dq; std::string line; line.resize(NX); timespec t; t.tv_sec = 0; t.tv_nsec = 20000000; for (int i = 0; i < NX; i++) q1[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+0.0001; for (int k = 0; k < cycles*NX/(nt*dt); k++) { std::cout << "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"; for (int j = j_max; j > 0; j--) { double q_thresh = j*dq; for (int i = 0; i < NX; i++) { if (q1[i] > q_thresh) { line[i] = '#'; } else { line[i] = ' '; } } std::cout << line << "\n"; } for (int i = 0; i < NX; i++) { line[i] = '-'; } std::cout << line << "\n"; for (int j = -1; j > j_min; j--) { double q_thresh = j*dq; for (int i = 0; i < NX; i++) { if (q1[i] <= q_thresh) { line[i] = '$'; } else { line[i] = ' '; } } std::cout << line << "\n"; std::cout.flush(); } nanosleep(&t, 0); //toon(nt, dt, q1, q2); lax_wendroff(nt, dt, q1, q2); double* tmp = q1; q2 = q1; q1 = tmp; } return 0; } ================================================ FILE: benchmark/autodiff_benchmark.cpp ================================================ /* autodiff_benchmark.cpp - Program to benchmark different automatic differentiation tools Copyright (C) 2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #include #include #include #include #include "differentiator.h" #include using adept::Real; static Real rms(const std::vector& a, const std::vector&b) { if (a.size() != b.size()) { throw differentiator_exception("Attempt to compute RMS difference between vectors of different size"); } Real sum = 0.0; for (size_t i = 0; i < a.size(); i++) { sum += (a[i]-b[i])*(a[i]-b[i]); } return sqrt(sum/a.size()); } static void usage(const char* argv0) { std::cout << "Usage: " << argv0 << " [OPTIONS] where OPTIONS can be\n"; std::cout << " -h|--help Print this message\n"; std::cout << " -a|--algorithm s Use test algorithms specified by string s which may be\n"; std::cout << " \"all\" or a comma separated list with possible entries\n"; std::cout << " " << test_algorithms() << "\n"; std::cout << " -t|--tool s Use automatic differentiation tools specified by string\n"; std::cout << " s which may be \"all\" or a comma separated list with\n"; std::cout << " possible entries " << autodiff_tools() << "\n"; std::cout << " -r|--repeat n Benchmark repeats the simulation n times\n"; std::cout << " -j|--jrepeat n Repeat the Jacobian simulation n times\n"; std::cout << " -n|--timesteps n Simulation uses n timesteps\n"; std::cout << " --print-result Print the final output from the simulation(s)\n"; std::cout << " --print-adjoint Print the hand-coded adjoint\n"; std::cout << " --print-jacobian Print the hand-coded Jacobian matrix\n"; std::cout << " --no-openmp Don't use OpenMP to speed up Adept\n"; std::cout << " --jacobian-forward Force use of forward-mode Jacobian\n"; std::cout << " --jacobian-reverse Force use of reverse-mode Jacobian\n"; std::cout << " --tolerance x Agreement with hand-coded requires RMS difference < x\n"; std::cout << " --verify-only No benchmark: only verify correctness of results\n"; std::cout << "Return code: 0 if all automatic differentiation tools produce adjoints and\n" " Jacobians whose RMS difference with the values from hand-coded\n" " differentiation is less than the required tolerance; 1 otherwise.\n"; } int main(int argc, char** argv) { int nt = 2000; int nr = 100; int nr_jacobian = nr/10; Real dt = 0.125; Real tolerance = 1.0e-5; int force_jacobian = 0; bool verbose = false; bool print_result = false; bool print_adjoint = false; bool print_jacobian = false; bool no_openmp = false; bool verify_only = false; std::valarray use_tool(N_AUTODIFF_TOOLS); std::valarray use_algorithm(N_TEST_ALGORITHMS); use_tool = true; use_algorithm = true; int iarg = 1; while (iarg < argc) { if (std::string("-h") == argv[iarg] || std::string("--help") == argv[iarg]) { usage(argv[0]); return 0; } if (std::string("-v") == argv[iarg] || std::string("--verbose") == argv[iarg]) { verbose = true; } else if (std::string("--print-result") == argv[iarg]) { print_result = true; } else if (std::string("--print-adjoint") == argv[iarg]) { print_adjoint = true; } else if (std::string("--print-jacobian") == argv[iarg]) { print_jacobian = true; } else if (std::string("--jacobian-forward") == argv[iarg]) { force_jacobian = +1; } else if (std::string("--jacobian-reverse") == argv[iarg]) { force_jacobian = -1; } else if (std::string("--no-openmp") == argv[iarg]) { no_openmp = true; } else if (std::string("--verify-only") == argv[iarg]) { verify_only = true; } else if (std::string("-a") == argv[iarg] || std::string("--algorithm") == argv[iarg]) { if (++iarg < argc) { if (std::string(argv[iarg]) != "all") { use_algorithm = false; std::istringstream ss(argv[iarg]); std::string alg; while (std::getline(ss, alg, ',')) { bool found = false; for (int i = 0; i < N_TEST_ALGORITHMS; i++) { if (alg == test_algorithm_string[i]) { use_algorithm[i] = true; found = true; break; } } if (!found) { std::cout << "Test algorithm \"" << alg << "\" not available; available algorithms are " << test_algorithms() << "\n"; } } } } else { std::cout << "Arguments \"-a\" or \"--algorithm\" need to be followed by a string containing a comma-separated list of algorithms\n"; return 1; } } else if (std::string("-t") == argv[iarg] || std::string("--tool") == argv[iarg]) { if (++iarg < argc) { if (std::string(argv[iarg]) != "all") { use_tool = false; std::istringstream ss(argv[iarg]); std::string tool; while (std::getline(ss, tool, ',')) { bool found = false; for (int i = 0; i < N_AUTODIFF_TOOLS; i++) { if (tool == autodiff_tool_string[i]) { use_tool[i] = true; found = true; break; } } if (!found) { std::cout << "Automatic differentiation tool \"" << tool << "\" not available; available tools are " << autodiff_tools() << "\n"; } } } } else { std::cout << "Arguments \"-a\" or \"--algorithm\" need to be followed by a string containing a comma-separated list of algorithms\n"; return 1; } } else if (std::string("-r") == argv[iarg] || std::string("--repeat") == argv[iarg]) { if (++iarg < argc) { std::stringstream ss(argv[iarg]); if (ss >> nr) { if (nr <= 0) { std::cout << "Number of repeats must be greater than zero\n"; return 1; } } else { std::cout << "Failed to read \"" << argv[iarg] << "\"as an integer\n"; return 1; } } else { throw differentiator_exception("Arguments \"-r\" or \"--repeat\" need to be followed by a number"); } } else if (std::string("-j") == argv[iarg] || std::string("--jrepeat") == argv[iarg]) { if (++iarg < argc) { std::stringstream ss(argv[iarg]); if (ss >> nr_jacobian) { if (nr <= 0) { throw differentiator_exception("Number of repeats must be greater than zero"); } } else { std::string msg = "Failed to read \""; msg += argv[iarg]; msg += "\"as an integer"; throw differentiator_exception(msg.c_str()); } } else { throw differentiator_exception("Arguments \"-j\" or \"--jrepeat\" need to be followed by a number"); } } else if (std::string("-n") == argv[iarg] || std::string("--timesteps") == argv[iarg]) { if (++iarg < argc) { std::stringstream ss(argv[iarg]); if (ss >> nt) { if (nt < 0) { throw differentiator_exception("Number of timesteps must be greater than or equal to zero"); } } else { std::string msg = "Failed to read \""; msg += argv[iarg]; msg += "\"as an integer"; throw differentiator_exception(msg.c_str()); } } else { throw differentiator_exception("Arguments \"-n\" or \"--timesteps\" need to be followed by a number"); } } else if (std::string("--tolerance") == argv[iarg]) { if (++iarg < argc) { std::stringstream ss(argv[iarg]); if (ss >> tolerance) { if (tolerance < 0) { throw differentiator_exception("Tolerance must be greater than or equal to zero"); } } else { std::string msg = "Failed to read \""; msg += argv[iarg]; msg += "\"as a Real"; throw differentiator_exception(msg.c_str()); } } else { throw differentiator_exception("Arguments \"-j\" or \"--jrepeat\" need to be followed by a number"); } } else { std::string msg = "Argument \""; msg += argv[iarg]; msg += "\" not understood\n"; std::cout << msg; usage(argv[0]); return 1; } iarg++; } Real pi = 4.0*atan(1.0); std::vector q_init(NX); std::vector q(NX); std::vector q_AD(NX); std::vector q_init_AD(NX); std::vector q_init_AD_reference(NX); std::vector jac(NX*NX); std::vector jac_reference(NX*NX); int nr_warm_up = nr/10; int nr_jacobian_warm_up = nr_jacobian/10; if (nr_warm_up < 1) { nr_warm_up = 1; } if (nr_jacobian_warm_up < 1) { nr_jacobian_warm_up = 1; } if (verify_only) { nr = 0; nr_jacobian = 0; nr_warm_up = 1; nr_jacobian_warm_up = 1; } for (int i = 0; i < NX; i++) q_init[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+1; for (int i = 0; i < NX; i++) q_AD[i] = 0.1; bool verify_error = false; Timer timer; std::cout << "Automatic differentiation benchmark and verification\n"; std::cout << " Automatic differentiation tools = "; bool is_first = true; for (int i = 0; i < N_AUTODIFF_TOOLS; i++) { if (use_tool[i]) { if (!is_first) { std::cout << ", "; } else { is_first = false; } std::cout << autodiff_tool_long_string[i]; } } std::cout << "\n"; std::cout << " Test algorithms = "; is_first = true; for (int i = 0; i < N_TEST_ALGORITHMS; i++) { if (use_algorithm[i]) { if (!is_first) { std::cout << ", "; } else { is_first = false; } std::cout << test_algorithm_long_string[i]; } } std::cout << "\n"; std::cout << " Number of x points = " << NX << "\n"; std::cout << " Number of timesteps = " << nt << ", Courant number = " << dt << "\n"; if (!verify_only) { std::cout << " Algorithm repeats = " << nr << ", warm-up repeats = " << nr_warm_up << "\n"; std::cout << " Jacobian repeats = " << nr_jacobian << ", warm-up repeats = " << nr_jacobian_warm_up << "\n"; } else { std::cout << " Verifying results only: no repeats\n"; } std::cout << adept::configuration(); // Loop through test algorithms for (int ialg = 0; ialg < N_TEST_ALGORITHMS; ialg++) { if (use_algorithm[ialg]) { std::string algorithm_string = test_algorithm_long_string[ialg]; std::cout << "\nRunning test algorithm \"" << algorithm_string << "\":\n"; TestAlgorithm ta = static_cast(ialg); std::cout << " Hand coded (forward-mode Jacobian only)\n"; HandCodedDifferentiator hand_coded_differentiator(timer, algorithm_string); hand_coded_differentiator.initialize(nt, dt); for (int i = 0; i < nr_warm_up; i++) { hand_coded_differentiator.func(ta, q_init, q); hand_coded_differentiator.adjoint(ta, q_init, q, q_AD, q_init_AD_reference); hand_coded_differentiator.jacobian(ta, q_init, q, jac_reference); } hand_coded_differentiator.reset_timings(); for (int i = 0; i < nr; i++) { hand_coded_differentiator.func(ta, q_init, q); hand_coded_differentiator.adjoint(ta, q_init, q, q_AD, q_init_AD_reference); hand_coded_differentiator.jacobian(ta, q_init, q, jac_reference); } if (print_result) { std::cout << " result = [" << q[0]; for (int i = 1; i < NX; i++) { std::cout << ", " << q[i]; } std::cout << "]\n"; } if (print_adjoint) { std::cout << "adjoint = [" << q_init_AD_reference[0]; for (int i = 1; i < NX; i++) { std::cout << ", " << q_init_AD_reference[i]; } std::cout << "]\n"; } if (print_jacobian) { Real (&q_K)[NX][NX] = *reinterpret_cast(&jac_reference[0]); std::cout << "jacobian = [\n"; for (int i = 0; i < NX; i++) { std::cout << q_K[i][0]; for (int j = 1; j < NX; j++) { std::cout << ", " << q_K[i][j]; } std::cout << "\n"; } std::cout << "]\n"; } Real base_time = timer.timing(hand_coded_differentiator.base_timer_id()); if (!verify_only) { std::cout << " Time of original algorithm: " << base_time << " seconds\n"; std::cout << " Absolute time of adjoint: " << timer.timing(hand_coded_differentiator.adjoint_compute_timer_id()) << " s\n"; std::cout << " Relative time of adjoint: " << timer.timing(hand_coded_differentiator.adjoint_compute_timer_id()) / base_time << "\n"; std::cout << " Absolute time of Jacobian: " << timer.timing(hand_coded_differentiator.jacobian_timer_id()) << " s\n"; std::cout << " Relative time of Jacobian: " << timer.timing(hand_coded_differentiator.jacobian_timer_id()) / base_time << "\n"; } for (int itool = 0; itool < N_AUTODIFF_TOOLS; itool++) { if (use_tool[itool]) { Differentiator* differentiator = new_differentiator(static_cast(itool), timer, algorithm_string); if (!differentiator) { if (verbose) std::cout << "Automatic differentiation tool with code " << itool << " not available\n"; continue; } differentiator->initialize(nt, dt); if (no_openmp) { differentiator->no_openmp(); } std::cout << " " << differentiator->name() << "\n"; if (test_algorithm_is_vector[ialg] && !differentiator->supports_vector_calls()) { std::cout << " ...vector calls not supported\n"; delete differentiator; continue; } for (int i = 0; i < nr_warm_up; i++) { differentiator->adjoint(ta, q_init, q, q_AD, q_init_AD); } Real rms_verify = rms(q_init_AD, q_init_AD_reference); if (rms_verify > tolerance) { std::cout << " *** Adjoint RMS difference with hand-coded of " << rms_verify << " is greater than tolerance of " << tolerance << " ***\n"; if (print_adjoint) { std::cout << "adjoint_auto = [" << q_init_AD[0]; for (int i = 1; i < NX; i++) { std::cout << ", " << q_init_AD[i]; } std::cout << "]\n"; } verify_error = true; } else { std::cout << " Adjoint RMS difference with hand-coded of " << rms_verify << " is within tolerance of " << tolerance << "\n"; } for (int i = 0; i < nr_jacobian_warm_up; i++) { differentiator->jacobian(ta, q_init, q, jac, force_jacobian); } rms_verify = rms(jac, jac_reference); if (rms_verify > tolerance) { std::cout << " *** Jacobian RMS difference with hand-coded of " << rms_verify << " is greater than tolerance of " << tolerance << " ***\n"; verify_error = true; } else { std::cout << " Jacobian RMS difference with hand-coded of " << rms_verify << " is within tolerance of " << tolerance << "\n"; } if (!verify_only) { differentiator->reset_timings(); for (int i = 0; i < nr; i++) { differentiator->adjoint(ta, q_init, q, q_AD, q_init_AD); } Real relative_record_time = timer.timing(differentiator->base_timer_id()) / base_time; Real relative_adjoint_time = timer.timing(differentiator->adjoint_compute_timer_id()) / base_time; Real relative_adjoint_prep_time = timer.timing(differentiator->adjoint_prep_timer_id()) / base_time; std::cout << " Absolute time of adjoint: " << timer.timing(differentiator->base_timer_id()) + timer.timing(differentiator->adjoint_compute_timer_id()) + timer.timing(differentiator->adjoint_prep_timer_id()) << " s (" << timer.timing(differentiator->base_timer_id()) << " s + "; if (relative_adjoint_prep_time > 0.0) { std::cout << timer.timing(differentiator->adjoint_prep_timer_id()) << " s + "; } std::cout << timer.timing(differentiator->adjoint_compute_timer_id()) << " s)\n"; std::cout << " Relative time of adjoint: " << relative_record_time + relative_adjoint_prep_time + relative_adjoint_time << " (" << relative_record_time << " + "; if (relative_adjoint_prep_time > 0.0) { std::cout << relative_adjoint_prep_time << " + "; } std::cout << relative_adjoint_time << ")\n"; differentiator->reset_timings(); } for (int i = 0; i < nr_jacobian; i++) { differentiator->jacobian(ta, q_init, q, jac, force_jacobian); } if (print_jacobian) { Real (&q_K)[NX][NX] = *reinterpret_cast(&jac[0]); std::cout << "jacobian_auto = [\n"; for (int i = 0; i < NX; i++) { std::cout << q_K[i][0]; for (int j = 1; j < NX; j++) { std::cout << ", " << q_K[i][j]; } std::cout << "\n"; } std::cout << "]\n"; } if (!verify_only) { Real relative_record_time = (nr*timer.timing(differentiator->base_timer_id())) /(nr_jacobian*base_time); Real relative_jacobian_time = (nr*timer.timing(differentiator->jacobian_timer_id())) /(nr_jacobian*base_time); Real relative_adjoint_prep_time = (nr*timer.timing(differentiator->adjoint_prep_timer_id())) /(nr_jacobian*base_time); std::cout << " Absolute time of Jacobian: " << timer.timing(differentiator->base_timer_id()) + timer.timing(differentiator->adjoint_prep_timer_id()) + timer.timing(differentiator->jacobian_timer_id()) << " s (" << timer.timing(differentiator->base_timer_id()) << " s + "; if (relative_adjoint_prep_time > 0.0) { std::cout << timer.timing(differentiator->adjoint_prep_timer_id()) << " s + "; } std::cout << timer.timing(differentiator->jacobian_timer_id()) << " s)\n"; std::cout << " Relative time of Jacobian: " << relative_record_time + relative_adjoint_prep_time + relative_jacobian_time << " (" << relative_record_time << " + "; if (relative_adjoint_prep_time > 0.0) { std::cout << relative_adjoint_prep_time << " + "; } std::cout << relative_jacobian_time << ")\n"; } differentiator->print(); delete differentiator; } } } } if (verify_error) { std::cout << "\nEXITING WITH ERROR CODE 1: ONE OR MORE OF THE AUTOMATIC DIFFERENTIATION\n" << "TOOLS DID NOT REPRODUCE THE HAND-CODING RESULT\n"; return 1; } else { std::cout << "\nAll tests were passed within tolerance\n"; return 1; } } ================================================ FILE: benchmark/differentiator.h ================================================ /* differentiator.h Copyright (C) 2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include "Timer.h" #include "adept.h" using adept::Real; #ifdef HAVE_ADOLC // Note that ADOL-C places the "adouble" type in the global namespace #include "adolc/adolc.h" #endif #ifdef HAVE_CPPAD #include "cppad/cppad.hpp" #endif #ifdef HAVE_SACADO #include "Sacado.hpp" #endif #include "advection_schemes.h" #include "advection_schemes_AD.h" #include "advection_schemes_K.h" enum TestAlgorithm { TEST_ALGORITHM_LAX_WENDROFF = 0, TEST_ALGORITHM_TOON = 1, TEST_ALGORITHM_LAX_WENDROFF_VECTOR = 2, TEST_ALGORITHM_TOON_VECTOR = 3, N_TEST_ALGORITHMS }; const char* test_algorithm_long_string[] = {"Lax-Wendroff", "Toon et al.", "Lax-Wendroff vector", "Toon et al. vector"}; const char* test_algorithm_string[] = {"lw","toon","lw_vector", "toon_vector"}; const bool test_algorithm_is_vector[] = {false, false, true, true}; inline std::string test_algorithms() { std::string algs = test_algorithm_string[0]; for (int i = 1; i < N_TEST_ALGORITHMS; i++) { algs += ","; algs += test_algorithm_string[i]; } return algs; } class differentiator_exception : public std::exception { public: differentiator_exception(const char* message = "An error occurred in differentiator.h") { message_ = message; } virtual const char* what() const throw() { return message_; } protected: const char* message_; }; // Base class from which specialist differentiators (hand-coded, // Adept, ADOL-C etc) inherit class Differentiator { public: Differentiator(Timer& timer) : timer_(timer) { initialize(2000, 0.125); } virtual ~Differentiator() { } virtual void print() { } void initialize(int nt, Real c) { nt_ = nt; c_ = c; } virtual bool supports_vector_calls() { return false; } // Call the function to be differentiated, with the active type // provided as a template argument template void func(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y) { timer_.start(base_timer_id_); if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) { lax_wendroff(nt_, c_, &x[0], &y[0]); } else if (test_algorithm == TEST_ALGORITHM_TOON) { toon(nt_, c_, &x[0], &y[0]); } timer_.stop(); } virtual bool adjoint(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, const std::vector& y_AD, std::vector& x_AD) { return false; } virtual bool jacobian(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, std::vector& jac, int force_jacobian = 0) { return false; } void reset_timings() { timer_.reset(base_timer_id_); timer_.reset(adjoint_prep_timer_id_); timer_.reset(adjoint_compute_timer_id_); timer_.reset(jacobian_timer_id_); } virtual std::string name() const = 0; //{ return "GENERIC"; } virtual void no_openmp() { } int base_timer_id() const { return base_timer_id_; } int adjoint_prep_timer_id() const { return adjoint_prep_timer_id_; } int adjoint_compute_timer_id() const { return adjoint_compute_timer_id_; } int jacobian_timer_id() const { return jacobian_timer_id_; } protected: void init_timer(const std::string name_) { base_timer_id_ = timer_.new_activity(name() + " | " + name_ + " | record"); adjoint_prep_timer_id_ = timer_.new_activity(name() + " | " + name_ + " | adjoint prep"); adjoint_compute_timer_id_ = timer_.new_activity(name() + " | " + name_ + " | adjoint compute"); jacobian_timer_id_ = timer_.new_activity(name() + " | " + name_ + " | Jacobian"); } protected: Timer& timer_; int nt_; // Number of timesteps to run Real c_; // Courant number int base_timer_id_; int adjoint_prep_timer_id_; int adjoint_compute_timer_id_; int jacobian_timer_id_; }; // ================= HAND CODED =========================== #include "advection_schemes_AD.h" class HandCodedDifferentiator : public Differentiator { public: HandCodedDifferentiator(Timer& timer, const std::string& name_) : Differentiator(timer) { init_timer(name_); } virtual bool supports_vector_calls() { return true; } virtual bool adjoint(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, const std::vector& y_AD, std::vector& x_AD) { if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) { timer_.start(adjoint_compute_timer_id_); lax_wendroff_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]); timer_.stop(); } else if (test_algorithm == TEST_ALGORITHM_TOON) { timer_.start(adjoint_compute_timer_id_); toon_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]); timer_.stop(); } else if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF_VECTOR) { timer_.start(adjoint_compute_timer_id_); lax_wendroff_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]); timer_.stop(); } else if (test_algorithm == TEST_ALGORITHM_TOON_VECTOR) { timer_.start(adjoint_compute_timer_id_); toon_AD(nt_, c_, &x[0], &y[0], &y_AD[0], &x_AD[0]); timer_.stop(); } else { std::cerr << "Algorithm not found: " << test_algorithm << "\n"; return false; } return true; } virtual bool jacobian(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, std::vector& jac, int force_jacobian = 0) { jac.resize(NX*NX); if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) { timer_.start(jacobian_timer_id_); lax_wendroff_K(nt_, c_, &x[0], &y[0], &jac[0]); timer_.stop(); } else if (test_algorithm == TEST_ALGORITHM_TOON) { timer_.start(jacobian_timer_id_); toon_K(nt_, c_, &x[0], &y[0], &jac[0]); timer_.stop(); } else if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF_VECTOR) { timer_.start(jacobian_timer_id_); lax_wendroff_K(nt_, c_, &x[0], &y[0], &jac[0]); timer_.stop(); } else if (test_algorithm == TEST_ALGORITHM_TOON_VECTOR) { timer_.start(jacobian_timer_id_); toon_K(nt_, c_, &x[0], &y[0], &jac[0]); timer_.stop(); } else { std::cerr << "Algorithm not found: " << test_algorithm << "\n"; return false; } return true; } virtual std::string name() const { return "Hand coded"; } }; // ================= ADEPT ================================ class AdeptDifferentiator : public Differentiator { public: AdeptDifferentiator(Timer& timer, const std::string& name_) : Differentiator(timer) { init_timer(name_); } virtual ~AdeptDifferentiator() { } virtual bool supports_vector_calls() { return true; } // Need to overload the function in the base class, because only // Adept supports the _VECTOR versions of the algorithms template void func(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y) { timer_.start(base_timer_id_); if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF) { lax_wendroff(nt_, c_, &x[0], &y[0]); } else if (test_algorithm == TEST_ALGORITHM_TOON) { toon(nt_, c_, &x[0], &y[0]); } else if (test_algorithm == TEST_ALGORITHM_LAX_WENDROFF_VECTOR) { lax_wendroff_vector(nt_, c_, &x[0], &y[0]); } else if (test_algorithm == TEST_ALGORITHM_TOON_VECTOR) { toon_vector(nt_, c_, &x[0], &y[0]); } timer_.stop(); } virtual bool adjoint(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, const std::vector& y_AD, std::vector& x_AD) { if (x.size() != NX || y_AD.size() != NX) { throw differentiator_exception("One of input vectors not of size NX in call to AdeptDifferentiator::adjoint"); } y.resize(NX); x_AD.resize(NX); std::vector q_init(NX); std::vector q(NX); adept::set_values(&q_init[0], NX, &x[0]); stack_.new_recording(); func(test_algorithm, q_init, q); timer_.start(adjoint_compute_timer_id_); adept::set_gradients(&q[0], NX, &y_AD[0]); stack_.compute_adjoint(); adept::get_gradients(&q_init[0], NX, &x_AD[0]); timer_.stop(); return true; } virtual bool jacobian(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, std::vector& jac, int force_jacobian = 0) { if (x.size() != NX) { throw differentiator_exception("Input vector x not of size NX in call to AdeptDifferentiator::jacobian"); } y.resize(NX); jac.resize(NX*NX); std::vector q_init(NX); std::vector q(NX); adept::set_values(&q_init[0], NX, &x[0]); stack_.new_recording(); func(test_algorithm, q_init, q); stack_.independent(&q_init[0], NX); stack_.dependent(&q[0], NX); timer_.start(jacobian_timer_id_); if (force_jacobian > 0) { stack_.jacobian_forward(&jac[0]); } else if (force_jacobian < 0) { stack_.jacobian_reverse(&jac[0]); } else { stack_.jacobian(&jac[0]); } timer_.stop(); return true; } virtual std::string name() const { std::stringstream name_; name_ << "Adept"; int nthread = stack_.max_jacobian_threads(); if (nthread > 1) { name_ << " (Jacobian using up to " << nthread << " OpenMP threads)"; } else { name_ << " (single threaded)"; } return name_.str(); } virtual void no_openmp() { stack_.set_max_jacobian_threads(1); } virtual void print() { std::cout << "========== ADEPT STACK BEGIN ==========\n"; std::cout << stack_; std::cout << "========== ADEPT STACK END ============\n"; } private: adept::Stack stack_; }; #ifdef HAVE_ADOLC // ================= ADOLC ================================ class AdolcDifferentiator : public Differentiator { public: AdolcDifferentiator(Timer& timer, const std::string& name_) : Differentiator(timer), jac(0), I(0), result(0) { init_timer(name_); } // Note that ADOL-C places the "adouble" type in the global namespace typedef adouble aReal; virtual ~AdolcDifferentiator() { if (I) { myfreeI2(NX, I); } if (jac) { myfree2(jac); } if (result) { myfree1(result); } } virtual bool adjoint(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, const std::vector& y_AD, std::vector& x_AD) { if (x.size() != NX || y_AD.size() != NX) { throw differentiator_exception("One of input vectors not of size NX in call to AdolcDifferentiator::adjoint"); } y.resize(NX); x_AD.resize(NX); std::vector q_init(NX); std::vector q(NX); trace_on(1,1); for (int i = 0; i < NX; i++) { q_init[i] <<= x[i]; } func(test_algorithm, q_init, q); for (int i = 0; i < NX; i++) { q[i] >>= y[i]; } trace_off(); timer_.start(adjoint_compute_timer_id_); reverse(1, NX, NX, 0, const_cast(&y_AD[0]), &x_AD[0]); timer_.stop(); return true; } virtual bool jacobian(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, std::vector& jac_, int force_jacobian = 0) { if (x.size() != NX) { throw differentiator_exception("Input vector x not of size NX in call to AdolcDifferentiator::jacobian"); } y.resize(NX); jac_.resize(NX*NX); std::vector q_init(NX); std::vector q(NX); trace_on(1,1); for (int i = 0; i < NX; i++) { q_init[i] <<= x[i]; } func(test_algorithm, q_init, q); for (int i = 0; i < NX; i++) { q[i] >>= y[i]; } trace_off(); if (!jac) { jac = myalloc2(NX,NX); I = myallocI2(NX); result = myalloc1(NX); } timer_.start(jacobian_timer_id_); if (force_jacobian < 0) { int rc = zos_forward(1, NX, NX, 1, &x[0], result); if (rc < 0) { throw differentiator_exception("Error occurred ADOL-C's zos_forward()"); } MINDEC(rc,fov_reverse(1, NX, NX, NX, I, jac)); } else if (force_jacobian > 0) { int rc = fov_forward(1, NX, NX, NX, &x[0], I, result, jac); if (rc < 0) { throw differentiator_exception("Error occurred ADOL-C's fov_forward()"); } } else { ::jacobian(1, NX, NX, &x[0], jac); } timer_.stop(); for (int j=0, index=0; j < NX; j++) { for (int i=0; i < NX; i++, index++) { jac_[index] = jac[i][j]; } } return true; } virtual std::string name() const { return "ADOL-C"; } private: Real** jac; Real** I; Real* result; }; #endif // HAVE_ADOLC #ifdef HAVE_CPPAD // ================= CPPAD ================================ class CppadDifferentiator : public Differentiator { public: typedef CppAD::AD aReal; CppadDifferentiator(Timer& timer, const std::string& name_) : Differentiator(timer) { init_timer(name_); CppAD::thread_alloc::hold_memory(true); } virtual ~CppadDifferentiator() { } virtual bool adjoint(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, const std::vector& y_AD, std::vector& x_AD) { if (x.size() != NX || y_AD.size() != NX) { throw differentiator_exception("One of input vectors not of size NX in call to CppadDifferentiator::adjoint"); } y.resize(NX); x_AD.resize(NX); std::vector q_init(NX); std::vector q(NX); for (int i = 0; i < NX; i++) { q_init[i] = x[i]; } CppAD::Independent(q_init); func(test_algorithm, q_init, q); for (int i = 0; i < NX; i++) { y[i] = CppAD::Value(q[i]); } timer_.start(adjoint_prep_timer_id_); CppAD::ADFun f(q_init, q); timer_.start(adjoint_compute_timer_id_); x_AD = f.Reverse(1, y_AD); timer_.stop(); return true; } virtual bool jacobian(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, std::vector& jac, int force_jacobian = 0) { if (x.size() != NX) { throw differentiator_exception("Input vector x not of size NX in call to CppadDifferentiator::jacobian"); } y.resize(NX); jac.resize(NX*NX); jac_transpose_.resize(NX*NX); std::vector q_init(NX); std::vector q(NX); for (int i = 0; i < NX; i++) { q_init[i] = x[i]; } CppAD::Independent(q_init); func(test_algorithm, q_init, q); for (int i = 0; i < NX; i++) { y[i] = CppAD::Value(q[i]); } timer_.start(adjoint_prep_timer_id_); CppAD::ADFun f(q_init, q); timer_.start(jacobian_timer_id_); if (force_jacobian < 0) { CppAD::JacobianRev(f, x, jac_transpose_); } else if (force_jacobian > 0) { CppAD::JacobianFor(f, x, jac_transpose_); } else { jac_transpose_ = f.Jacobian(x); } // Transpose Jacobian because CppAD uses the opposite convention to the other tools Real (&jac_transpose2)[NX][NX] = *reinterpret_cast(&jac_transpose_[0]); for (int i = 0, index = 0; i < NX; i++) { for (int j = 0; j < NX; j++, index++) { jac[index] = jac_transpose2[j][i]; } } return true; } virtual std::string name() const { return "CppAD"; } private: std::vector jac_transpose_; }; #endif // HAVE_CPPAD #ifdef HAVE_SACADO // ================= SACADO ================================ template<> int Sacado::Rad::ADmemblock::n_blocks = 0; class SacadoDifferentiator : public Differentiator { public: typedef Sacado::Rad::ADvar aReal; typedef Sacado::ELRFad::DFad aReal_fad; SacadoDifferentiator(Timer& timer, const std::string& name_) : Differentiator(timer) { init_timer(name_); } virtual ~SacadoDifferentiator() { } virtual bool adjoint(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, const std::vector& y_AD, std::vector& x_AD) { if (x.size() != NX || y_AD.size() != NX) { throw differentiator_exception("One of input vectors not of size NX in call to SacadoDifferentiator::adjoint"); } y.resize(NX); x_AD.resize(NX); std::vector q_init(NX); std::vector q(NX); for (int i = 0; i < NX; i++) { q_init[i] = x[i]; } func(test_algorithm, q_init, q); for (int i = 0; i < NX; i++) { y[i] = q[i].val(); } timer_.start(base_timer_id_); aReal objective_func = 0.0; for (int i = 0; i < NX; i++) { objective_func += q[i] * y_AD[i]; } timer_.start(adjoint_compute_timer_id_); Sacado::Rad::ADvar::Gradcomp(); for (int i = 0; i < NX; i++) { x_AD[i] = q_init[i].adj(); } timer_.stop(); return true; } virtual bool jacobian(TestAlgorithm test_algorithm, const std::vector& x, std::vector& y, std::vector& jac, int force_jacobian = 0) { if (x.size() != NX) { throw differentiator_exception("Input vector x not of size NX in call to SacadoDifferentiator::jacobian"); } y.resize(NX); jac.resize(NX*NX); std::vector q_init(NX); std::vector q(NX); for (int i = 0; i < NX; i++) { q_init[i] = x[i]; q_init[i].resize(NX); q[i].resize(NX); q_init[i].fastAccessDx(i) = 1.0; } func(test_algorithm, q_init, q); for (int i = 0; i < NX; i++) { y[i] = q[i].val(); } int index = 0; for (int i = 0; i < NX; i++) { for (int k = 0; k < NX; k++, index++) { jac[index] = q[k].dx(i); } } return true; } virtual std::string name() const { return "Sacado (::Rad for adjoint, forward-mode only ::ELRFad for Jacobian)"; } }; #endif // HAVE_SACADO // The following enum is designed to be used in a "for" loop to loop // through the available automatic differentiaion tools enum AutoDiffTool { AUTODIFF_TOOL_ADEPT = 0 #ifdef HAVE_ADOLC , AUTODIFF_TOOL_ADOLC #endif #ifdef HAVE_CPPAD , AUTODIFF_TOOL_CPPAD #endif #ifdef HAVE_SACADO , AUTODIFF_TOOL_SACADO #endif , N_AUTODIFF_TOOLS }; const char* autodiff_tool_string[] = { "adept" #ifdef HAVE_ADOLC , "adolc" #endif #ifdef HAVE_CPPAD , "cppad" #endif #ifdef HAVE_SACADO , "sacado" #endif }; const char* autodiff_tool_long_string[] = { "Adept" #ifdef HAVE_ADOLC , "ADOL-C" #endif #ifdef HAVE_CPPAD , "CppAD" #endif #ifdef HAVE_SACADO , "Sacado" #endif }; inline std::string autodiff_tools() { std::string tools = autodiff_tool_string[0]; for (int i = 1; i < N_AUTODIFF_TOOLS; i++) { tools += ","; tools += autodiff_tool_string[i]; } return tools; } // Return pointer to a virtual base object Differentiator inline Differentiator* new_differentiator(AutoDiffTool auto_diff_tool, Timer& timer, const std::string& name_) { if (auto_diff_tool == AUTODIFF_TOOL_ADEPT) { return new AdeptDifferentiator(timer, name_); } #ifdef HAVE_ADOLC else if (auto_diff_tool == AUTODIFF_TOOL_ADOLC) { return new AdolcDifferentiator(timer, name_); } #endif #ifdef HAVE_CPPAD else if (auto_diff_tool == AUTODIFF_TOOL_CPPAD) { return new CppadDifferentiator(timer, name_); } #endif #ifdef HAVE_SACADO else if (auto_diff_tool == AUTODIFF_TOOL_SACADO) { return new SacadoDifferentiator(timer, name_); } #endif else { return 0; } } ================================================ FILE: benchmark/math_benchmark.cpp ================================================ /* math_benchmark.cpp - Benchmark mathematical functions Copyright (C) 2023 ECMWF Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #include #include "Timer.h" int main(int argc, const char** argv) { using namespace adept; static const int N = 1024; int nrepeat = 1024*16; Vector x(N), y(N); Timer timer; timer.print_on_exit(true); int add_id = timer.new_activity("addition"); int sub_id = timer.new_activity("subtraction"); int mul_id = timer.new_activity("multiplication"); int div_id = timer.new_activity("division"); int exp_id = timer.new_activity("exp"); int fastexp_id = timer.new_activity("fastexp"); int log_id = timer.new_activity("log"); int sin_id = timer.new_activity("sin"); x = 1.001; y = x*x; y = 0.0; timer.start(add_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y += x; } timer.stop(); y = 0.0; timer.start(sub_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y -= x; } timer.stop(); y = 1.0; timer.start(mul_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y *= x; } timer.stop(); std::cout << "y=" << y(0) << "\n"; timer.start(div_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y /= x; } timer.stop(); x = 0.001; timer.start(exp_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y = exp(x); x = y-1.001; } timer.stop(); std::cout << "y=" << y(0) << "\n"; x = 0.001; timer.start(fastexp_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y = fastexp(x); x = y-1.001; } timer.stop(); std::cout << "y=" << y(0) << "\n"; x = 1.001; timer.start(log_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y = log(x); x = y+1.0; } timer.stop(); std::cout << "y=" << y(0) << "\n"; x = 1.001; timer.start(sin_id); for (int irepeat = 0; irepeat < nrepeat; ++irepeat) { y = sin(x); y = x; } timer.stop(); std::cout << "y=" << y(0) << "\n"; std::cout << "RELATIVE COSTS\n"; std::cout << "div/mul = " << timer.timing(div_id)/timer.timing(mul_id) << "\n"; std::cout << "exp/mul = " << timer.timing(exp_id)/timer.timing(mul_id) << "\n"; std::cout << "fastexp/mul = " << timer.timing(fastexp_id)/timer.timing(mul_id) << "\n"; std::cout << "log/mul = " << timer.timing(log_id)/timer.timing(mul_id) << "\n"; std::cout << "sin/mul = " << timer.timing(sin_id)/timer.timing(mul_id) << "\n"; } ================================================ FILE: benchmark/matrix_benchmark.cpp ================================================ #include #include #include "Timer.h" template double time_matmul(int n, int nrepeat, bool is_col_major) { adept::Array<2,double,IsActive> A, B, C; Timer timer; int matmul_timer_id = timer.new_activity("matmul"); if (is_col_major) { A.resize_column_major(adept::expression_size(n,n)); B.resize_column_major(adept::expression_size(n,n)); C.resize_column_major(adept::expression_size(n,n)); } else { A.resize(n,n); B.resize(n,n); C.resize(n,n); } for (int irepeat = -nrepeat/10; irepeat < nrepeat; ++irepeat) { A = 1.1; B = 2.2; A.diag_vector() = 3.3; B.diag_vector() = 5.5; if (IsActive) { adept::active_stack()->new_recording(); } if (irepeat >= 0) { timer.start(matmul_timer_id); } C = A ** B; if (irepeat >= 0) { timer.stop(); } } /* if (IsActive && n < 8) { std::cout << "C=" << C; std::cout << *adept::active_stack(); adept::active_stack()->print_statements(); } */ return timer.timing(matmul_timer_id) / nrepeat; } double time_solve(int n, int nrepeat, bool is_col_major) { adept::Matrix A, B, C; Timer timer; int solve_timer_id = timer.new_activity("solve"); if (is_col_major) { A.resize_column_major(adept::expression_size(n,n)); B.resize_column_major(adept::expression_size(n,n)); C.resize_column_major(adept::expression_size(n,n)); } else { A.resize(n,n); B.resize(n,n); C.resize(n,n); } for (int irepeat = -nrepeat/10; irepeat < nrepeat; ++irepeat) { A = 1.1; B = 2.2; A.diag_vector() = 3.3; B.diag_vector() = 5.5; if (irepeat >= 0) { timer.start(solve_timer_id); } C = adept::solve(A, B); if (irepeat >= 0) { timer.stop(); } } return timer.timing(solve_timer_id) / nrepeat; } int main(int argc, char* argv[]) { int ibegin = 1; int iend = 8; int nrepeat = 20; bool is_col_major = false; adept::Stack stack; int n = 2; std::cout << "Average cost per operation (" << nrepeat << " repeats)\n"; std::cout << "Dense N-by-N matrix-matrix multiplication\n"; //std::cout << " N inactive time (us) inactive flops active time (us) active flops\n"; std::cout << "N \tinactive time (us) \tactive time (us)\n"; for (int i = ibegin; i <= iend; ++i) { std::cout << n << " \t"; double t = time_matmul(n, nrepeat, is_col_major); // std::cout << t*1.0e6 << " " << (n*n*n) / t << " "; std::cout << t*1.0e6/nrepeat << " \t\t\t"; t = time_matmul(n, nrepeat, is_col_major); // std::cout << t*1.0e6 << " " << (n*n*n) / t; std::cout << t*1.0e6/nrepeat; std::cout << "\n"; n *= 2; } n = 2; std::cout << "Dense N-by-N matrix-matrix solve\n"; std::cout << "N \tinactive time (us)\n"; for (int i = ibegin; i <= iend; ++i) { std::cout << n << " \t"; double t = time_solve(n, nrepeat, is_col_major); // std::cout << t*1.0e6 << " " << (n*n*n) / t << " "; std::cout << t*1.0e6/nrepeat << "\n"; n *= 2; } return 0; } ================================================ FILE: benchmark/nx.h ================================================ #ifndef NX #define NX 100 #endif ================================================ FILE: config_platform_independent.h.in ================================================ /* config_platform_independent.h.in. */ /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Version number of package */ #undef VERSION ================================================ FILE: configure.ac ================================================ # Configure autoconf for the Adept library ### GENERAL CONFIGURATION ### AC_PREREQ([2.61]) AC_INIT([adept], [2.1.3], [r.j.hogan@ecmwf.int], [adept], [http://www.met.reading.ac.uk/clouds/adept/]) AC_LANG([C++]) AC_CONFIG_SRCDIR([adept/Stack.cpp]) AC_CONFIG_HEADERS([config.h config_platform_independent.h]) AM_INIT_AUTOMAKE([foreign -Wall -Werror]) AC_CONFIG_MACRO_DIR([m4]) # Checks for programs AC_PROG_CXX AC_PROG_F77 AC_PROG_MAKE_SET m4_ifdef([AM_PROG_AR],[AM_PROG_AR]) AC_PROG_LIBTOOL # Check for system features AC_CHECK_HEADERS([sys/time.h]) AC_CHECK_FUNCS([gettimeofday pow sqrt]) # Check for OpenMP AC_OPENMP AC_SUBST(AM_CXXFLAGS,"$OPENMP_CXXFLAGS") #### LIBRARIES NEEDED BY ADEPT ### if test "x$F77" = x then AC_MSG_NOTICE([Not checking for BLAS and LAPACK because no Fortran compiler found]) else # Check for BLAS and LAPACK # First we need this since the libraries are Fortran called from C++ AC_F77_LIBRARY_LDFLAGS # The following tests for both BLAS and LAPACK AX_LAPACK fi # Dependencies dictate the following order of libraries LIBS="$LAPACK_LIBS $BLAS_LIBS $LIBS" # FLIBS should be included in LDADD or LIBADD in the relevant # Makefile.am # If the BLAS library is OpenBLAS then we need to give the user the # option to change the number of threads, since OpenBLAS's pthreads # can clash with Adept's use of OpenMP, leading to suboptimal # performance. ac_have_openblas_cblas_header=no if test "$ax_blas_ok" = yes then if test "x$BLAS_LIBS" = "x-lopenblas" then AC_MSG_CHECKING([whether cblas.h is from OpenBLAS]) AC_TRY_LINK([#include ], [openblas_set_num_threads(1)], [ac_have_openblas_cblas_header=yes AC_MSG_RESULT(yes) AC_DEFINE([HAVE_OPENBLAS_CBLAS_HEADER],1,[Is the clbas.h header file from OpenBLAS?])], AC_MSG_RESULT(no)) fi fi ### LIBRARIES THAT MAY BE USED BY TEST PROGRAMS ### # Checks for GNU Scientific Library AC_CHECK_LIB([gsl],[gsl_multimin_fdfminimizer_alloc],[AC_MSG_NOTICE([Note that GSL is not used by Adept, just by one of the test programs])]) AC_SUBST(USE_GSL, ["$ac_cv_lib_gsl_gsl_multimin_fdfminimizer_alloc"]) # Check for ADOL-C automatic differentiation library AC_CHECK_HEADERS([adolc/adolc.h]) AC_CHECK_LIB([adolc],[tapestats]) # Check for SACADO automatic differentiation library ac_have_sacado=no save_LIBS=$LIBS LIBS="$LIBS -lsacado -lteuchos" AC_MSG_CHECKING([whether Sacado is installed]) AC_TRY_LINK([#include ], [Sacado::ELRFad::DFad v = 1.0], [ac_have_sacado=yes AC_MSG_RESULT(yes) AC_DEFINE([HAVE_SACADO],1,[Is the Sacado library working?])], [LIBS=$save_LIBS AC_MSG_RESULT(no)]) # Check for CppAD automatic differentiation library AC_CHECK_HEADERS([cppad/cppad.hpp]) if test "$ac_cv_header_cppad_cppad_hpp" = yes then AC_DEFINE([NDEBUG],1,[If CppAD is being used by the benchmarking program then it is much faster with debugging disabled]) fi ### CREATE MAKEFILES AND CONFIG HEADER ### AC_CONFIG_FILES([Makefile makefile_include adept/Makefile include/Makefile benchmark/Makefile]) AC_DEFINE_UNQUOTED([CXX],["$CXX"],[C++ compiler]) AC_DEFINE_UNQUOTED([CXXFLAGS],["$CXXFLAGS"],[Flags passed to C++ compiler]) AC_DEFINE_UNQUOTED([BLAS_LIBS],["$BLAS_LIBS"],[BLAS library option]) AH_BOTTOM([/* Use ADOLC only if both the library and the header files are available */ #if defined( HAVE_LIBADOLC ) && defined( HAVE_ADOLC_ADOLC_H ) #define HAVE_ADOLC 1 #endif]) AH_BOTTOM([/* Use CPPAD if the header files are available */ #if defined( HAVE_CPPAD_CPPAD_HPP ) #define HAVE_CPPAD 1 #endif]) AC_OUTPUT ### REPORT CONFIGURATION TO THE USER ### AC_MSG_NOTICE([********************* Summary **************************************]) AC_MSG_NOTICE([ CXX = $CXX ]) AC_MSG_NOTICE([ CPPFLAGS = $CPPFLAGS]) AC_MSG_NOTICE([ CXXFLAGS = $CXXFLAGS $OPENMP_CXXFLAGS]) AC_MSG_NOTICE([ LDFLAGS = $LDFLAGS]) AC_MSG_NOTICE([ LIBS = $LIBS]) AC_MSG_NOTICE([Typing "make; make install" will install Adept header files in $includedir]) AC_MSG_NOTICE([and the static and shared libraries as $libdir/libadept.*, where]) AC_MSG_NOTICE([prefix=$prefix]) AC_MSG_NOTICE([********************* Libraries used by Adept **********************]) ac_warn_given=no if test "$ax_blas_ok" = yes then AC_MSG_NOTICE([BLAS (Basic Linear Algebra Subprograms) will be used: BLAS_LIBS = $BLAS_LIBS]) if test "$ac_have_openblas_cblas_header" = yes then AC_MSG_NOTICE([ Number of BLAS threads may be controlled at run time]) fi else AC_MSG_NOTICE([BLAS (Basic Linear Algebra Subprograms) will not be used: MATRIX MULTIPLICATION IS UNAVAILABLE]) ac_warn_given=yes fi if test "$ax_lapack_ok" = yes then AC_MSG_NOTICE([LAPACK (Linear Algebra Package) will be used: LAPACK_LIBS = $LAPACK_LIBS]) else AC_MSG_NOTICE([LAPACK (Linear Algebra Package) will not be used: LINEAR ALGEBRA ROUTINES ARE UNAVAILABLE]) ac_warn_given=yes fi AC_MSG_NOTICE([********************* Libraries used by test programs **************]) if test "$ac_cv_lib_gsl_gsl_multimin_fdfminimizer_alloc" = no then AC_MSG_NOTICE([GNU Scientific Library (GSL) not found; Adept will compile all the]) AC_MSG_NOTICE([example programs except test/test_gsl_interface.]) ac_warn_given=yes else AC_MSG_NOTICE([GNU Scientific Library (GSL) found; Adept will compile all the]) AC_MSG_NOTICE([example programs.]) fi AC_MSG_NOTICE([********************* Benchmark program ****************************]) AC_MSG_NOTICE([The benchmarking program, "benchmark/advection_benchmark", will be]) AC_MSG_NOTICE([compiled with support for these automatic differentiation libraries:]) AC_MSG_NOTICE([ Adept: yes]) if test "$ac_cv_lib_adolc_tapestats" = yes -a "$ac_cv_header_adolc_adolc_h" = yes then AC_MSG_NOTICE([ ADOLC: yes]) else AC_MSG_NOTICE([ ADOLC: no]) ac_warn_given=yes fi if test "$ac_cv_header_cppad_cppad_hpp" = yes then AC_MSG_NOTICE([ CppAD: yes]) else AC_MSG_NOTICE([ CppAD: no]) ac_warn_given=yes fi if test "$ac_have_sacado" = no then AC_MSG_NOTICE([ Sacado: no]) ac_warn_given=yes else AC_MSG_NOTICE([ Sacado: yes]) fi AC_MSG_NOTICE([********************* Top tips *************************************]) AC_MSG_NOTICE([To use a higher than default optimization level, call this configure]) AC_MSG_NOTICE([script with something like: ./configure "CXXFLAGS=-g -O3"]) AC_MSG_NOTICE([If you have libraries in non-standard locations, specify their location]) AC_MSG_NOTICE([by calling this script with something like:]) AC_MSG_NOTICE([ ./configure CPPFLAGS=-I/local/include LDFLAGS="-L/local/lib -Wl,-rpath,/local/lib"]) AC_MSG_NOTICE([The rpath argument is especially useful for locating the BLAS and LAPACK]) AC_MSG_NOTICE([libraries if they are in non-standard locations, so that executables]) AC_MSG_NOTICE([built with Adept do not need to use the LD_LIBRARY_PATH environment]) AC_MSG_NOTICE([variable to specify their locations at run-time.]) AC_MSG_NOTICE([********************************************************************]) ================================================ FILE: doc/COPYING ================================================ GNU Free Documentation License Version 1.3, 3 November 2008 Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. 0. PREAMBLE The purpose of this License is to make a manual, textbook, or other functional and useful document "free" in the sense of freedom: to assure everyone the effective freedom to copy and redistribute it, with or without modifying it, either commercially or noncommercially. Secondarily, this License preserves for the author and publisher a way to get credit for their work, while not being considered responsible for modifications made by others. This License is a kind of "copyleft", which means that derivative works of the document must themselves be free in the same sense. It complements the GNU General Public License, which is a copyleft license designed for free software. We have designed this License in order to use it for manuals for free software, because free software needs free documentation: a free program should come with manuals providing the same freedoms that the software does. But this License is not limited to software manuals; it can be used for any textual work, regardless of subject matter or whether it is published as a printed book. We recommend this License principally for works whose purpose is instruction or reference. 1. APPLICABILITY AND DEFINITIONS This License applies to any manual or other work, in any medium, that contains a notice placed by the copyright holder saying it can be distributed under the terms of this License. Such a notice grants a world-wide, royalty-free license, unlimited in duration, to use that work under the conditions stated herein. The "Document", below, refers to any such manual or work. Any member of the public is a licensee, and is addressed as "you". You accept the license if you copy, modify or distribute the work in a way requiring permission under copyright law. A "Modified Version" of the Document means any work containing the Document or a portion of it, either copied verbatim, or with modifications and/or translated into another language. A "Secondary Section" is a named appendix or a front-matter section of the Document that deals exclusively with the relationship of the publishers or authors of the Document to the Document's overall subject (or to related matters) and contains nothing that could fall directly within that overall subject. (Thus, if the Document is in part a textbook of mathematics, a Secondary Section may not explain any mathematics.) The relationship could be a matter of historical connection with the subject or with related matters, or of legal, commercial, philosophical, ethical or political position regarding them. The "Invariant Sections" are certain Secondary Sections whose titles are designated, as being those of Invariant Sections, in the notice that says that the Document is released under this License. If a section does not fit the above definition of Secondary then it is not allowed to be designated as Invariant. The Document may contain zero Invariant Sections. If the Document does not identify any Invariant Sections then there are none. The "Cover Texts" are certain short passages of text that are listed, as Front-Cover Texts or Back-Cover Texts, in the notice that says that the Document is released under this License. A Front-Cover Text may be at most 5 words, and a Back-Cover Text may be at most 25 words. A "Transparent" copy of the Document means a machine-readable copy, represented in a format whose specification is available to the general public, that is suitable for revising the document straightforwardly with generic text editors or (for images composed of pixels) generic paint programs or (for drawings) some widely available drawing editor, and that is suitable for input to text formatters or for automatic translation to a variety of formats suitable for input to text formatters. A copy made in an otherwise Transparent file format whose markup, or absence of markup, has been arranged to thwart or discourage subsequent modification by readers is not Transparent. An image format is not Transparent if used for any substantial amount of text. A copy that is not "Transparent" is called "Opaque". Examples of suitable formats for Transparent copies include plain ASCII without markup, Texinfo input format, LaTeX input format, SGML or XML using a publicly available DTD, and standard-conforming simple HTML, PostScript or PDF designed for human modification. Examples of transparent image formats include PNG, XCF and JPG. Opaque formats include proprietary formats that can be read and edited only by proprietary word processors, SGML or XML for which the DTD and/or processing tools are not generally available, and the machine-generated HTML, PostScript or PDF produced by some word processors for output purposes only. The "Title Page" means, for a printed book, the title page itself, plus such following pages as are needed to hold, legibly, the material this License requires to appear in the title page. For works in formats which do not have any title page as such, "Title Page" means the text near the most prominent appearance of the work's title, preceding the beginning of the body of the text. The "publisher" means any person or entity that distributes copies of the Document to the public. A section "Entitled XYZ" means a named subunit of the Document whose title either is precisely XYZ or contains XYZ in parentheses following text that translates XYZ in another language. (Here XYZ stands for a specific section name mentioned below, such as "Acknowledgements", "Dedications", "Endorsements", or "History".) To "Preserve the Title" of such a section when you modify the Document means that it remains a section "Entitled XYZ" according to this definition. The Document may include Warranty Disclaimers next to the notice which states that this License applies to the Document. These Warranty Disclaimers are considered to be included by reference in this License, but only as regards disclaiming warranties: any other implication that these Warranty Disclaimers may have is void and has no effect on the meaning of this License. 2. VERBATIM COPYING You may copy and distribute the Document in any medium, either commercially or noncommercially, provided that this License, the copyright notices, and the license notice saying this License applies to the Document are reproduced in all copies, and that you add no other conditions whatsoever to those of this License. You may not use technical measures to obstruct or control the reading or further copying of the copies you make or distribute. However, you may accept compensation in exchange for copies. If you distribute a large enough number of copies you must also follow the conditions in section 3. You may also lend copies, under the same conditions stated above, and you may publicly display copies. 3. COPYING IN QUANTITY If you publish printed copies (or copies in media that commonly have printed covers) of the Document, numbering more than 100, and the Document's license notice requires Cover Texts, you must enclose the copies in covers that carry, clearly and legibly, all these Cover Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on the back cover. Both covers must also clearly and legibly identify you as the publisher of these copies. The front cover must present the full title with all words of the title equally prominent and visible. You may add other material on the covers in addition. Copying with changes limited to the covers, as long as they preserve the title of the Document and satisfy these conditions, can be treated as verbatim copying in other respects. If the required texts for either cover are too voluminous to fit legibly, you should put the first ones listed (as many as fit reasonably) on the actual cover, and continue the rest onto adjacent pages. If you publish or distribute Opaque copies of the Document numbering more than 100, you must either include a machine-readable Transparent copy along with each Opaque copy, or state in or with each Opaque copy a computer-network location from which the general network-using public has access to download using public-standard network protocols a complete Transparent copy of the Document, free of added material. If you use the latter option, you must take reasonably prudent steps, when you begin distribution of Opaque copies in quantity, to ensure that this Transparent copy will remain thus accessible at the stated location until at least one year after the last time you distribute an Opaque copy (directly or through your agents or retailers) of that edition to the public. It is requested, but not required, that you contact the authors of the Document well before redistributing any large number of copies, to give them a chance to provide you with an updated version of the Document. 4. MODIFICATIONS You may copy and distribute a Modified Version of the Document under the conditions of sections 2 and 3 above, provided that you release the Modified Version under precisely this License, with the Modified Version filling the role of the Document, thus licensing distribution and modification of the Modified Version to whoever possesses a copy of it. In addition, you must do these things in the Modified Version: A. Use in the Title Page (and on the covers, if any) a title distinct from that of the Document, and from those of previous versions (which should, if there were any, be listed in the History section of the Document). You may use the same title as a previous version if the original publisher of that version gives permission. B. List on the Title Page, as authors, one or more persons or entities responsible for authorship of the modifications in the Modified Version, together with at least five of the principal authors of the Document (all of its principal authors, if it has fewer than five), unless they release you from this requirement. C. State on the Title page the name of the publisher of the Modified Version, as the publisher. D. Preserve all the copyright notices of the Document. E. Add an appropriate copyright notice for your modifications adjacent to the other copyright notices. F. Include, immediately after the copyright notices, a license notice giving the public permission to use the Modified Version under the terms of this License, in the form shown in the Addendum below. G. Preserve in that license notice the full lists of Invariant Sections and required Cover Texts given in the Document's license notice. H. Include an unaltered copy of this License. I. Preserve the section Entitled "History", Preserve its Title, and add to it an item stating at least the title, year, new authors, and publisher of the Modified Version as given on the Title Page. If there is no section Entitled "History" in the Document, create one stating the title, year, authors, and publisher of the Document as given on its Title Page, then add an item describing the Modified Version as stated in the previous sentence. J. Preserve the network location, if any, given in the Document for public access to a Transparent copy of the Document, and likewise the network locations given in the Document for previous versions it was based on. These may be placed in the "History" section. You may omit a network location for a work that was published at least four years before the Document itself, or if the original publisher of the version it refers to gives permission. K. For any section Entitled "Acknowledgements" or "Dedications", Preserve the Title of the section, and preserve in the section all the substance and tone of each of the contributor acknowledgements and/or dedications given therein. L. Preserve all the Invariant Sections of the Document, unaltered in their text and in their titles. Section numbers or the equivalent are not considered part of the section titles. M. Delete any section Entitled "Endorsements". Such a section may not be included in the Modified Version. N. Do not retitle any existing section to be Entitled "Endorsements" or to conflict in title with any Invariant Section. O. Preserve any Warranty Disclaimers. If the Modified Version includes new front-matter sections or appendices that qualify as Secondary Sections and contain no material copied from the Document, you may at your option designate some or all of these sections as invariant. To do this, add their titles to the list of Invariant Sections in the Modified Version's license notice. These titles must be distinct from any other section titles. You may add a section Entitled "Endorsements", provided it contains nothing but endorsements of your Modified Version by various parties--for example, statements of peer review or that the text has been approved by an organization as the authoritative definition of a standard. You may add a passage of up to five words as a Front-Cover Text, and a passage of up to 25 words as a Back-Cover Text, to the end of the list of Cover Texts in the Modified Version. Only one passage of Front-Cover Text and one of Back-Cover Text may be added by (or through arrangements made by) any one entity. If the Document already includes a cover text for the same cover, previously added by you or by arrangement made by the same entity you are acting on behalf of, you may not add another; but you may replace the old one, on explicit permission from the previous publisher that added the old one. The author(s) and publisher(s) of the Document do not by this License give permission to use their names for publicity for or to assert or imply endorsement of any Modified Version. 5. COMBINING DOCUMENTS You may combine the Document with other documents released under this License, under the terms defined in section 4 above for modified versions, provided that you include in the combination all of the Invariant Sections of all of the original documents, unmodified, and list them all as Invariant Sections of your combined work in its license notice, and that you preserve all their Warranty Disclaimers. The combined work need only contain one copy of this License, and multiple identical Invariant Sections may be replaced with a single copy. If there are multiple Invariant Sections with the same name but different contents, make the title of each such section unique by adding at the end of it, in parentheses, the name of the original author or publisher of that section if known, or else a unique number. Make the same adjustment to the section titles in the list of Invariant Sections in the license notice of the combined work. In the combination, you must combine any sections Entitled "History" in the various original documents, forming one section Entitled "History"; likewise combine any sections Entitled "Acknowledgements", and any sections Entitled "Dedications". You must delete all sections Entitled "Endorsements". 6. COLLECTIONS OF DOCUMENTS You may make a collection consisting of the Document and other documents released under this License, and replace the individual copies of this License in the various documents with a single copy that is included in the collection, provided that you follow the rules of this License for verbatim copying of each of the documents in all other respects. You may extract a single document from such a collection, and distribute it individually under this License, provided you insert a copy of this License into the extracted document, and follow this License in all other respects regarding verbatim copying of that document. 7. AGGREGATION WITH INDEPENDENT WORKS A compilation of the Document or its derivatives with other separate and independent documents or works, in or on a volume of a storage or distribution medium, is called an "aggregate" if the copyright resulting from the compilation is not used to limit the legal rights of the compilation's users beyond what the individual works permit. When the Document is included in an aggregate, this License does not apply to the other works in the aggregate which are not themselves derivative works of the Document. If the Cover Text requirement of section 3 is applicable to these copies of the Document, then if the Document is less than one half of the entire aggregate, the Document's Cover Texts may be placed on covers that bracket the Document within the aggregate, or the electronic equivalent of covers if the Document is in electronic form. Otherwise they must appear on printed covers that bracket the whole aggregate. 8. TRANSLATION Translation is considered a kind of modification, so you may distribute translations of the Document under the terms of section 4. Replacing Invariant Sections with translations requires special permission from their copyright holders, but you may include translations of some or all Invariant Sections in addition to the original versions of these Invariant Sections. You may include a translation of this License, and all the license notices in the Document, and any Warranty Disclaimers, provided that you also include the original English version of this License and the original versions of those notices and disclaimers. In case of a disagreement between the translation and the original version of this License or a notice or disclaimer, the original version will prevail. If a section in the Document is Entitled "Acknowledgements", "Dedications", or "History", the requirement (section 4) to Preserve its Title (section 1) will typically require changing the actual title. 9. TERMINATION You may not copy, modify, sublicense, or distribute the Document except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, or distribute it is void, and will automatically terminate your rights under this License. However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, receipt of a copy of some or all of the same material does not give you any rights to use it. 10. FUTURE REVISIONS OF THIS LICENSE The Free Software Foundation may publish new, revised versions of the GNU Free Documentation License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. See http://www.gnu.org/copyleft/. Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this License "or any later version" applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the Document does not specify a version number of this License, you may choose any version ever published (not as a draft) by the Free Software Foundation. If the Document specifies that a proxy can decide which future versions of this License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Document. 11. RELICENSING "Massive Multiauthor Collaboration Site" (or "MMC Site") means any World Wide Web server that publishes copyrightable works and also provides prominent facilities for anybody to edit those works. A public wiki that anybody can edit is an example of such a server. A "Massive Multiauthor Collaboration" (or "MMC") contained in the site means any set of copyrightable works thus published on the MMC site. "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 license published by Creative Commons Corporation, a not-for-profit corporation with a principal place of business in San Francisco, California, as well as future copyleft versions of that license published by that same organization. "Incorporate" means to publish or republish a Document, in whole or in part, as part of another Document. An MMC is "eligible for relicensing" if it is licensed under this License, and if all works that were first published under this License somewhere other than this MMC, and subsequently incorporated in whole or in part into the MMC, (1) had no cover texts or invariant sections, and (2) were thus incorporated prior to November 1, 2008. The operator of an MMC Site may republish an MMC contained in the site under CC-BY-SA on the same site at any time before August 1, 2009, provided the MMC is eligible for relicensing. ADDENDUM: How to use this License for your documents To use this License in a document you have written, include a copy of the License in the document and put the following copyright and license notices just after the title page: Copyright (c) YEAR YOUR NAME. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included in the section entitled "GNU Free Documentation License". If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts, replace the "with...Texts." line with this: with the Invariant Sections being LIST THEIR TITLES, with the Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST. If you have Invariant Sections without Cover Texts, or some other combination of the three, merge those two alternatives to suit the situation. If your document contains nontrivial examples of program code, we recommend releasing these examples in parallel under your choice of free software license, such as the GNU General Public License, to permit their use in free software. ================================================ FILE: doc/Makefile ================================================ # If you have pdflatex installed, type "make" to create the # documentation, "make clean" to delete it documentation: adept_documentation.pdf adept_reference.pdf adept_documentation.pdf: adept_documentation.tex pdflatex adept_documentation.tex pdflatex adept_documentation.tex pdflatex adept_documentation.tex adept_reference.pdf: adept_reference.tex pdflatex adept_reference.tex clean: rm -f adept_documentation.pdf adept_reference.pdf .PHONY: documentation clean ================================================ FILE: doc/README ================================================ This directory contains the LaTeX source files for the Adept User Guide and Adept Reference Sheet Type "make" to create the corresponding PDF files (using pdflatex), and "make clean" to delete them Permission is granted to copy, distribute and/or modify the Adept User Guide and Adept Reference Sheet under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation. This license may be found at http://www.gnu.org/copyleft/fdl.html, and in this directory in the "COPYING" file. As an exception, no copyright is asserted for the code fragments in the document (indicated in the text with a light-grey background); these code fragments are in the Public Domain and may be copied, modified and distributed without restriction. ================================================ FILE: doc/adept_documentation.tex ================================================ % % Adept automatic differentiation library for C++: User guide % % Type "pdflatex adept_documentation.tex" twice to recreate the PDF % file (or type "make pdf" in this directory after running the % configure script one directory above). % % Permission is granted to copy, distribute and/or modify this % document under the terms of the GNU Free Documentation License, % Version 1.3 or any later version published by the Free Software % Foundation. This license may be found at % http://www.gnu.org/copyleft/fdl.html, and in this directory in the % "COPYING" file. As an exception, no copyright is asserted for the % code fragments in this document (indicated in the text with a % light-grey background); these code fragments are in the Public % Domain and may be copied, modified and distributed without % restriction. \documentclass[a4,oneside]{book} \usepackage[colorlinks=true,linkcolor=blue,citecolor=blue]{hyperref} \usepackage{natbib} \usepackage{times} \usepackage{listings} \usepackage{xcolor} \usepackage{color} \usepackage{marginnote} \usepackage{rotating} \usepackage{mdframed,lipsum} \newmdenv[ leftmargin = 0pt, innerleftmargin = 1em, innertopmargin = 0pt, innerbottommargin = 0pt, innerrightmargin = 0pt, rightmargin = 0pt, linewidth = 1pt, topline = false, rightline = false, bottomline = false ]{leftbar} % Set math in Times Roman \DeclareSymbolFont{letters}{OML}{ptmcm}{m}{it} \DeclareSymbolFont{operators}{OT1}{ptmcm}{m}{n} % Page set up \setlength{\oddsidemargin}{0cm} %{0.5cm} \setlength{\evensidemargin}{0cm} %{0.5cm} \setlength{\topmargin}{-2cm} \setlength{\textheight}{24cm} \setlength{\textwidth}{16cm} \setlength{\marginparsep}{0.5cm} \setlength{\marginparwidth}{0cm} \setlength{\parindent}{1em} \setlength{\parskip}{0cm} \renewcommand{\baselinestretch}{1.1} \sloppy % Configure appearance of code listings \definecolor{light-gray}{gray}{0.92} \def\codesize{\small} \lstset{language=C++, backgroundcolor=\color{light-gray}, numbersep=5pt, xleftmargin=0cm, xrightmargin=0cm, basicstyle=\footnotesize\ttfamily, emph={adouble,xdouble,Stack,adept,Array,FixedArray,Vector,aVector,aReal,Optimizable,Real,Minimizer,MinimizerStatus,Matrix,aMatrix,Array3D,aArray3D,intVector,boolVector,floatVector,floatMatrix,intMatrix,FortranArray,SpecialMatrix,SquareMatrix,aSquareMatrix,SymmMatrix,aSymmMatrix,UpperMatrix,LowerMatrix,IndexVector,adept_arrays,adept_optimize,adept_fortran}, emphstyle=\bfseries\color{red}} \lstset{showstringspaces=false} % Table-of-contents configuration \usepackage{tocloft} \setlength\cftparskip{-2pt} \setlength\cftbeforesecskip{1pt} \setlength\cftaftertoctitleskip{2pt} \renewcommand\cftsecfont{\normalfont} \renewcommand\cftsecpagefont{\normalfont} \renewcommand{\cftsecleader}{\cftdotfill{\cftsecdotsep}} \renewcommand\cftsecdotsep{\cftdot} \renewcommand\cftsubsecdotsep{\cftdot} % Page headers \usepackage{fancyhdr} \pagestyle{fancy} \renewcommand{\headrulewidth}{0.5pt} \renewcommand{\sectionmark}[1]{\markright{\thesection.\ #1}} \renewcommand{\subsectionmark}[1]{} \fancyhead[RO,RE]{\thepage} \fancyfoot[C]{} % Symbols and macros \def\x{\ensuremath{{\bf x}}} \def\y{\ensuremath{{\bf y}}} \def\H{\ensuremath{{\bf H}}} \def\T{\ensuremath{^\mathrm{T}}} \def\Adept{\emph{Adept}} \def\code#1{{\codesize\texttt{#1}}} \def\codebf#1{{\codesize\texttt{\textbf{#1}}}} \def\citem#1{\item[{\codesize\texttt{#1}}]} \def\codestyle#1{\texttt{#1}} \def\Offset{size\_t} \renewcommand\thefootnote{\relax} \def\cxx11{\marginpar{\rotatebox[origin=rb]{90}{\textbf{C++11 only~~~}}}} \reversemarginpar % Title material \title{\Adept\ C++ Software Library: User Guide} \author{Robin J. Hogan\\ \emph{European Centre for Medium Range Weather Forecasts, Reading, UK}\\ \emph{and School of Mathematical, Physical and Computational Sciences, University of Reading, UK,}} \date{Document version 2.1.3 (February 2024) applicable to \Adept\ version 2.1.3 \thanks{This document is copyright \copyright\ Robin J. Hogan 2013--2024. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation. This license may be found at \url{http://www.gnu.org/copyleft/fdl.html}. As an exception, no copyright is asserted for the code fragments in this document (indicated in the text with a light-grey background); these code fragments are hereby placed in the Public Domain, and accordingly may be copied, modified and distributed without restriction.} \thanks{If you have any queries about \Adept\ that are not answered by this document or by the information on the \Adept\ web site (\url{http://www.met.reading.ac.uk/clouds/adept/}) then please email me at \href{mailto:r.j.hogan@ecmwf.int}{\texttt{r.j.hogan@ecmwf.int}}.}} \begin{document} \maketitle \tableofcontents \def\thefootnote{\fnsymbol{footnote}} \chapter{Introduction} \section{What is Adept?} \Adept\ (Automatic Differentiation using Expression Templates) is a C++ software library that enables algorithms to be automatically differentiated. Since version 2.0\footnote{Note that the version 1.9.x series served as beta releases for version 2.0 of \Adept.} it also provides array classes that can be used in array expressions. These two capabilities are fully integrated such that array expressions can be differentiated efficiently, but the array capability may also be used on its own. The automatic-differentiation capability uses an operator overloading approach, so very little code modification is required. Differentiation can be performed in forward mode (the ``tangent-linear'' computation), reverse mode (the ``adjoint'' computation), or the full Jacobian matrix can be computed. This behaviour is common to several other libraries, namely ADOL-C \citep{Griewank+1996}, CppAD \citep{Bell2007} and Sacado \citep{Gay2005}, but the use of expression templates, an efficient way to store the differential information and several other optimizations mean that reverse-mode differentiation tends to be significantly faster and use less memory. In fact, \Adept\ is also usually only a little slower than an adjoint code you might write by hand, but immeasurably faster in terms of user time; adjoint coding is very time consuming and error-prone. For technical details of how it works, benchmark results and further discussion of the factors affecting its speed when applied to a particular code, see \cite{Hogan2014}. Expression templates also underpin a number of libraries that provide the capability to perform mathematical operations on entire arrays \citep{Veldhuizen1995}. Unfortunately, if \Adept\ version 1.x and such an array library are used together, then the speed advantages of expression templates are lost, if indeed the libraries work together at all. Since version 2.0, \Adept\ provides array classes that overcome this problem: its automatic differentiation and array capabilities are underpinned by a single unified expression template framework so that array expressions may be differentiated very efficiently. However, it should be stressed that \Adept\ is useful as a fully functional array library even if you don't wish to use its automatic differentiation capability. \Adept\ uses BLAS and LAPACK for matrix operations. This user guide describes how to apply the \Adept\ software library to your code, and many of the examples map on to those in the \code{test} directory of the \Adept\ software package. Section \ref{sec:installing} outlines how to install \Adept\ on your system and how to compile your own code to use it. Chapter \ref{chap:ad} describes how to use the automatic differentiation capability of the library, chapter \ref{chap:arrays} its array capability and chapter \ref{chap:optimize} its optimization capability. Chapter \ref{chap:gen} then describes general aspects such as exception handling, configuration options and license terms. \section{Installing \Adept\ and compiling your code to use it} \label{sec:installing} \Adept\ should work with any C++98 compliant compiler, but uses some C++11 features if compiled with support for this later standard. Most of the testing has been on Linux with the GNU C++ compiler, but it also compiles on Linux with the Clang and Intel compilers and on Windows with the Microsoft compiler. The code is built with the help of a \code{configure} shell script generated by GNU autotools. If you are on a non-Unix system (e.g.\ Windows) and cannot use shell scripts, see section \ref{sec:non-unix}. \subsection{Unix-like platforms} \label{sec:unix} On a Unix-like system, do the following: \begin{enumerate} \item Install the BLAS library to enable matrix multiplication. For the best performance in matrix operations it is recommended that you install an optimized package such as OpenBLAS\footnote{OpenBLAS is available from \url{http://www.openblas.net/}.} or ATLAS\footnote{ATLAS is available from \url{http://math-atlas.sourceforge.net/}.}. If you have multiple BLAS libraries available on your system you can specify the one you want by calling the \code{configure} script below with \code{--with-blas=openblas} or similar. If \Adept\ is compiled without BLAS support then matrix multiplication will fail at run time. \item Optionally install the LAPACK library, necessary for matrix inversion and solving linear systems of equations. If you do not install this then \Adept\ will still compile but the functions \code{inv} and \code{solve} will fail at run time. Note that LAPACK relies on the underlying BLAS library for its speed. \item The test and benchmarking programs can make use of additional libraries if available. If you also install any of the automatic differentiation tools ADOL-C, CppAD and/or Sacado then the benchmarking test program can compare them to \Adept. One of the test programs uses the minimization algorithm from the GNU Scientific Library, if available, so you may wish to install that too. \item Unpack the package (\code{tar xvfz adept-2.x.tar.gz} on Linux) and \code{cd} to the directory \code{adept-2.x}. \item Configure the build using the \code{configure} script. The most basic method is to just run \begin{lstlisting} ./configure \end{lstlisting} More likely you will wish to compile with a higher level of optimization than the default (which is \code{-O2}), achieved by setting the environment variable \code{CXXFLAGS}. You may also wish to specify the root directory of the installation, say to \code{/foo}. These may be done by running instead \begin{lstlisting} ./configure CXXFLAGS="-g -O3" --prefix=/foo \end{lstlisting} The \code{-g} option to \code{CXXFLAGS} ensures debugging information is stored. If you use the GNU compiler then consider the \code{-g1} option instead to reduce the amount of debugging information stored. The GNU \code{-march=native} option will also enable the fastest instruction set for the machine on which the code is being compiled. \Adept\ can vectorize certain floating-point array expressions making use of the SSE2, AVX and AVX512 instruction sets on Intel hardware and the NEON instruction set on 64-bit ARM. If a library you wish to use is installed in a non-system directory, say under \code{/foo}, then specify the locations as follows: \begin{lstlisting} ./configure CPPFLAGS="-I/foo/include" LDFLAGS="-L/foo/lib -Wl,-rpath,/foo/lib" \end{lstlisting} where the \code{-rpath} business is needed in order that the \Adept\ shared library knows where to look for the libraries it is dependent on. If you have them then for the benchmarking program you can also add the non-system location of ADOL-C, CppAD and Sacado libraries with additional \code{-I} and \code{-L} arguments, but note that the \code{-rpath} argument is not needed in that case. You can see the more general options available by running \code{./configure --help}; for example, you can turn-off OpenMP parallelization in the computation of Jacobian matrices using \code{--disable-openmp}. See also section \ref{sec:configuring} for ways to make more fundamental changes to the configuration of \Adept. The output from the \code{configure} script provides information on aspects of how \Adept\ and the test programs will be built. \item Build \Adept\ by running \begin{lstlisting} make \end{lstlisting} This will create the static and shared libraries in \code{adept/.libs}. \item Install the header files and the static and shared libraries by running \begin{lstlisting} make install \end{lstlisting} If this is to be installed to a system directory, you will need to log in as the super-user first, or run \code{sudo make install} on depending on your system. \item Build and run the test programs by running \begin{lstlisting} make check \end{lstlisting} Note that this may be done without first installing the \Adept\ library to a system directory. This compiles a number of test programs in the \code{test} directory and runs them one by one; if any fail due to an incorrect result then \code{make check} will fail. % The \code{make check} operation also compiles \code{autodiff\_benchmark} in the \code{benchmark} directory for comparing the speed of the differentiation of two advection algorithms using \Adept, ADOL-C, CppAD and Sacado (or whichever subset of these tools you have on your system). It also compiles \code{animate} for visualizing at a terminal what the algorithms are doing. Further information on running these programs can be found in the \code{README} files in the relevant directories. \end{enumerate} % The test programs in in the \code{test} directory are as follows: % \begin{enumerate} \item\code{test\_adept}: compares the results of numerical and automatic differentiation. \item\code{test\_with\_without\_ad}: does the same but compiling the same source code both with and without automatic differentiation (see \code{test/Makefile} for how this is done), \item\code{test\_radiances}: demonstrates the interfacing of \Adept\ with code that provides its own Jacobian. \item\code{test\_gsl\_interface}: implementation of a simple minimization problem using the L-BFGS minimizer in the GSL library. \item\code{test\_misc}: the trivial example from \cite{Hogan2014}. \item\code{test\_checkpoint}: demonstration of checkpointing, a useful technique for large codes. \item\code{test\_thread\_safe}: demonstration of the use of multiple OpenMP threads, each with its own instance of an \Adept\ stack. \item\code{test\_no\_lib}: demonstrates the use of the \code{adept\_source.h} header file that means there is no need to link to the \Adept\ library in order to create an executable. \item\code{test\_arrays}, \code{test\_arrays\_active}, \code{test\_arrays\_active\_pausable}, \code{test\_complex\_arrays}: test many of the array capabilities described in chapter \ref{chap:arrays}. Each of these four executables is compiled from the same source file but with different compiler options in order to test the same array operations but with (a) passive arrays, (b) active arrays, (c) active arrays but with stack recording "paused" (see setion \ref{sec:pausable}), and (d) complex arrays. \item\code{test\_array\_speed}: compares the speed of array operations versus the equivalent C-style \code{for} loop. \item\code{test\_radiances\_array}: as \code{test\_radiances} but demonstrates the use of \code{add\_derivative\_dependence} with array arguments. \item\code{test\_fixed\_arrays}, \code{test\_fixed\_arrays\_active}: tests the functionality of arrays with fixed dimensions, i.e.\ those known at compile time. The two executables are compiled from the same source file, testing (a) passive arrays and (b) active arrays. \item\code{test\_constructors}: test the different ways of constructing, assigning and linking arrays, and passing them to and from functions. \item\code{test\_derivatives}: tests that all mathematical functions supported by \Adept\ differentiate correctly. \item\code{test\_array\_derivatives}: tests that selected array operations differentiate correctly. \item\code{test\_thread\_safe\_arrays}: tests two ways to ensure arrays may be accessed and subsetted safely in a multi-threaded environment. \item\code{test\_packet\_operations}: tests that Adept's use of Intel or ARM intrinsics to accelerate vector operations leads to identical output to the equivalent scalar code. \item\code{test\_fastexp}: tests the correctness of Adept's fast exponential function. \item\code{test\_reduce\_active}: tests the correctness of the differentiation of reduction operations (\code{sum}, \code{product}, \code{maxval} etc). \item\code{test\_minimizer}: tests Adept's minimization capabilities on the N-dimensional Rosenbrock banana function. Different dimensionality and minimization algorithms can be used, but by default the Levenberg-Marquardt minimizer is used with the 2-dimensional Rosenbrock function. \end{enumerate} To compile source files that use the \Adept\ library, you need to make sure that \code{adept.h} and \code{adept\_arrays.h} are in your include path. If they are located in a directory that is not in the default include path, add something like \code{-I/home/fred/include} to the compiler command line. At the linking stage, add \code{-ladept} to the command line to tell the linker to look for the \code{libadept.a} static library, or equivalent shared library. If this file is in a non-standard location, also add something like \code{-L/home/fred/lib -Wl,-rpath,/home/fred/lib} before the \code{-ladept} argument to specify its location. Section \ref{sec:multipleobjects} provdes an example Makefile for compiling code that uses the \Adept\ library. Read on to see how you can compile an \Adept\ application \emph{without} needing to link to a library. \subsection{Non-Unix platforms, and compiling \Adept\ applications without linking to an external library} \label{sec:non-unix} Most of the difficulty in maintaining software that can compile on multiple platforms arises from the different ways of compiling software libraries, and the need to test on compilers that may be proprietary. Unfortunately I don't have the time to maintain versions of \Adept\ that build specifically on Microsoft Windows or other non-Unix platforms. However, \Adept\ is not a large library, so I have provided a very simple way to build an \Adept\ application \emph{without} the need to link to a pre-compiled \Adept\ library. In one of your source files and one only, add this near the top: \begin{lstlisting} #include \end{lstlisting} Typically you would include this in the source file containing the \code{main} function. This header file is simply a concatenation of the \Adept\ library source files, so when you compile a file that includes it, you compile in all the functionally of the \Adept\ library. All other source files in your application should include only the \code{adept.h} or \code{adept\_arrays.h} header file as normal. When you link all your object files together to make an executable, the \Adept\ functionality will be built in, even though you did not link to an external \Adept\ library. By default, \code{adept\_arrays.h} does not enable BLAS (needed for matrix multiplication) or LAPACK (needed for matrix inversion and solving linear systems of equations); to enable either BLAS alone, or both BLAS and LAPACK, uncomment the lines near the top of \code{adept\_source.h} defining \code{HAVE\_BLAS} and \code{HAVE\_LAPACK}, and link against functioning BLAS and LAPACK library. A demonstration of the use of \code{adept\_source.h} is in the \code{test/test\_no\_lib.cpp} source file, which needs to be compiled together with \code{test/algorithm.cpp} to make an executable. % It is hoped that this feature will make it easy to use \Adept\ on non-Unix platforms, although of course this feature works just as well on Unix-like platforms as well. % If you want to use OpenBLAS on such %platforms then you will still need to install that library in the %normal way.% A further point to note is that, under the terms of the license, it is permitted to copy all the \Adept\ include files, including \code{adept\_source.h}, into an include directory in your software package and use it from there in both binary and source-code releases of your software. This means that users do not need to install \Adept\ separately before they use your software. However, if you do this then remember that your use of these files must comply with the terms of the Apache License, Version 2.0; see section \ref{sec:license} for details. % \chapter{Using \Adept\ for automatic differentiation} \label{chap:ad} % \section{Introduction} \label{sec:ad_functionality} This chapter describes how to use \Adept\ to differentiate your code. For simplicity, none of the examples use array functionality described in the next chapter. \Adept\ provides the following automatic-differentiation functionality: % \begin{description} \item[Full Jacobian matrix] Given the non-linear function $\y=f(\x)$ relating vector $\y$ to vector $\x$ coded in C or C++, after a little code modification \Adept\ can compute the Jacobian matrix $\H=\partial\y/\partial\x$, where the element at row $i$ and column $j$ of $\H$ is $H_{i,j}=\partial y_i/\partial x_j$. This matrix will be computed much more rapidly and accurately than if you simply recompute the function multiple times, each time perturbing a different element of $\x$ by a small amount. The Jacobian matrix is used in the Gauss-Newton and Levenberg-Marquardt minimization algorithms. \item[Reverse-mode differentiation] This is a key component in optimization problems where a non-linear function needs to be minimized but the state vector $\x$ is too large for it to make sense to compute the full Jacobian matrix. Atmospheric data assimilation is the canonical example in the field of meteorology. Given a non-linear function $J(\x)$ relating the scalar to be minimized $J$ to vector $\x$, \Adept\ will compute the vector of adjoints $\partial J/\partial\x$. Moreover, for a component of the code that may be expressed as a multi-dimensional non-linear function $\y=f(\x)$, \Adept\ can compute $\partial J/\partial\x$ if it is provided with the vector of input adjoints $\partial J/\partial\y$. In this case, $\partial J/\partial\x$ is equal to the matrix-vector product $\H\T\partial J/\partial\y$, but it is computed here without computing the full Jacobian matrix $\H$. The vector $\partial J/\partial\x$ may then be used in a quasi-Newton minimization scheme \cite[e.g.,][]{Liu+1989}. \item[Forward-mode differentiation] Given the non-linear function $\y=f(\x)$ and a vector of perturbations $\delta\x$, \Adept\ will compute the corresponding vector $\delta\y$ arising from a linearization of the function $f$. Formally, $\delta\y$ is equal to the matrix-vector product $\H\delta\x$, but it is computed here without computing the full Jacobian matrix $\H$. Note that \Adept\ is designed for the reverse case, so might not be as fast or economical in memory in the forward mode as libraries written especially for that purpose (although Hogan, 2014, showed that it was competitive). \end{description}% % \Adept\ can automatically differentiate the following operators and functions: \begin{itemize} \item The standard binary mathematical operators \code{+}, \code{-}, \code{*} and \code{/}. \item The assignment versions of these operators: \code{+=}, \code{-=}, \code{*=} and \code{/=}. \item The unary mathematical functions \code{sqrt}, \code{exp}, \code{log}, \code{log10}, \code{sin}, \code{cos}, \code{tan}, \code{asin}, \code{acos}, \code{atan}, \code{sinh}, \code{cosh}, \code{tanh}, \code{abs}, \code{asinh}, \code{acosh}, \code{atanh}, \code{expm1}, \code{log1p}, \code{cbrt}, \code{erf}, \code{erfc}, \code{exp2}, \code{log2}, \code{round}, \code{trunc}, \code{rint} and \code{nearbyint}, \item The binary functions \code{pow}, \code{atan2}, \code{min}, \code{max}, \code{fmin} and \code{fmax}. \end{itemize} Variables to take part in expressions to be differentiated have a special ``active'' type; such variables can take part in comparison operations \code{==}, \code{!=}, \code{>}, \code{<}, \code{>=} and \code{<=}, as well as the diagnostic functions \code{isfinite}, \code{isinf} and \code{isnan}. Note that at present \Adept\ is missing some functionality that you may require: \begin{itemize} \item Differentiation is first-order only: it cannot directly compute higher-order derivatives such as the Hessian matrix, although section \ref{sec:optimize} describes how \Adept\ can help compute the approximate Hessian if the cost function (also known as the penalty function or objective function) is in a particular commonly used form. \item It has limited support for complex numbers; no support for mathematical functions of complex numbers, and expressions involving operations (addition, subtraction, multiplication and division) on complex numbers are not optimized. \item It can be applied to C and C++ only; \Adept\ could not be written in Fortran since the language provides no template capability. \end{itemize}% % It is hoped that future versions will remedy these limitations (and maybe even a future version of Fortran will support templates). Section \ref{sec:preparation} describes how to prepare your code for automatic differentiation, and section \ref{sec:adjoint} describes how to perform forward- and reverse-mode automatic differentiation on this code. Section \ref{sec:jacobian} describes how to compute Jacobian matrices. Section \ref{sec:realworld} provides a detailed description of how to interface an algorithm implemented using \Adept\ with a third-party minimization library. Section \ref{sec:withwithout} describes how to call a function both with and without automatic differentiation from within the same program. Section \ref{sec:interfacehandcoded} describes how to interface to software modules that compute their own Jacobians. Section \ref{sec:stack} describes the user-oriented member functions of the \code{Stack} class that contains the differential information and section \ref{sec:adouble} describes the member functions of the ``active'' double-precision type \code{adouble}. \section{Code preparation} \label{sec:preparation} If you have used ADOL-C, CppAD or Sacado then you will already be familiar with what is involved in applying an operator-overloading automatic differentiation package to your code. The user interface to \Adept\ differs from these only in the detail. It is assumed that you have an algorithm written in C or C++ that you wish to differentiate. This section deals with the modifications needed to your code, while section \ref{sec:adjoint} describes the small additional amount of code you need to write to differentiate it. In all source files containing code to be differentiated, you need to include the \code{adept.h} header file and import the \code{adouble} type from the \code{adept} namespace. Assuming your code uses double precision, you then search and replace \code{double} with the ``active'' equivalent \code{adouble}, but doing this only for those variables whose values depend on the independent input variables. Under the hood this type is an alias for \code{Active}. The single-precision equivalent is \code{afloat}, an alias for \code{Active}. Active and passive variables of single and double precision may be used together in the same expressions, but note that by default all differentiation is done in double precision. If you wish to enable your code to be easily recompiled to use different precisions, then you may alternatively use the generic \code{Real} type from the \code{adept} namespace with its active equivalent \code{aReal} (an alias for \code{Active}). Section \ref{sec:configuring} describes how to redefine \code{Real} to represent single, double or quadruple precision. Automatic differentiation will be performed using the same precision as \code{Real}, but but be aware that if this is defined to be the same as a single-precision \code{float}, accumulation of round-off error can make the accuracy of derivatives insufficient for minimization algorithms. The examples in the remainder of this chapter use only double precision. Consider the following contrived algorithm from \cite{Hogan2014} that takes two inputs and returns one output: \begin{lstlisting} double algorithm(const double x[2]) { double y = 4.0; double s = 2.0*x[0] + 3.0*x[1]*x[1]; y *= sin(s); return y; } \end{lstlisting} \noindent The modified code would look like this: \begin{lstlisting} #include using adept::adouble; adouble algorithm(const adouble x[2]) { adouble y = 4.0; adouble s = 2.0*x[0] + 3.0*x[1]*x[1]; y *= sin(s); return y; } \end{lstlisting} \noindent Changes like this need to be done in all source files that form part of an algorithm to be differentiated. If you need to access the real number underlying an \code{adouble} variable \code{a}, for example in order to use it as an argument to the \code{fprintf} function, then use \code{a.value()} or \code{adept::value(a)}. Any mathematical operations performed on this real number will not be differentiated. You may use \code{adouble} as the template argument of a Standard Template Library (STL) vector type (i.e. \code{std::vector\textless adouble\textgreater}), or indeed any container where you access individual elements one by one. For types allowing mathematical operations on the whole object, such as the STL \code{complex} and \code{valarray} types, you will find that although you can multiply one \code{std::complex\textless adouble\textgreater} or \code{std::valarray\textless adouble\textgreater} object by another, mathematical functions (\code{exp}, \code{sin} etc.) will not work when applied to whole objects, and neither will some simple operations such as multiplying these types by an ordinary (non-active) \code{double} variable. Moreover, the performance is not great because expressions cannot be fully optimized when in these containers. Therefore If you need array functionality then you should use the features described in chapter \ref{chap:arrays}. It is hoped that a future version of \Adept\ will include its own complex type. \section{Applying reverse-mode differentiation} \label{sec:adjoint} Suppose you wanted to create a version of \code{algorithm} that returned not only the result but also the gradient of the result with respect to its inputs, you would do this: \begin{lstlisting} #include double algorithm_and_gradient( const double x_val[2], // Input values double dy_dx[2]) { // Output gradients adept::Stack stack; // Where the derivative information is stored using adept::adouble; // Import adouble from adept adouble x[2] = {x_val[0], x_val[1]}; // Initialize active input variables stack.new_recording(); // Start recording adouble y = algorithm(x); // Call version overloaded for adouble args y.set_gradient(1.0); // Defines y as the cost function stack.compute_adjoint(); // Run the adjoint algorithm dy_dx[0] = x[0].get_gradient(); // Store the first gradient dy_dx[1] = x[1].get_gradient(); // Store the second gradient return y.value(); // Return the result of the simple computation } \end{lstlisting} % The component parts of this function are in a specific order, and if this order is violated then the code will not run correctly. The steps are now described. % \subsection{Set-up stack to record derivative information} \label{sec:stack_setup} \begin{lstlisting} adept::Stack stack; \end{lstlisting} The \code{Stack} object is where the differential version of the algorithm will be stored. When initialized, it makes itself accessible to subsequent statements via a global variable, but using thread-local storage to ensure thread safety. \emph{It must be initialized before the first \code{adouble} object is instantiated and it must not go out of scope until the last \code{adouble} object is destructed.} This is because \code{adouble} objects register themselves with the currently active stack, and deregister themselves when they are destroyed; if the same stack is not active throughout the lifetime of such \code{adouble} objects then the code will crash with a segmentation fault. In the example here, the \code{Stack} object is local to the scope of the function. If another \code{Stack} object had been initialized by the calling function and so was active at the point of entry to the function, then the local \code{Stack} object would throw an \code{adept::stack\_already\_active} exception. See Test 3 described at \code{test/README} in the \Adept\ package if you want to use multiple \code{Stack} objects in the same program: the relevant source code is in \code{test/simulate\_radiances.cpp}, which temporarily deactivates the existing \code{Stack} objects in order that the local one can run. A disadvantage of local \code{Stack} objects is that the memory it uses must be reallocated each time the function is called. This can be overcome in several ways: \begin{itemize} \item Declare the \code{Stack} object to be \code{static}, which means that it will persist between function calls. This has the disadvantage that you won't be able to use other \code{Stack} objects in the program without deactivating this one first (see \code{test\_radiances} in the \Adept\ package, referred to above, for how to do this). \item Initialize \code{Stack} at a higher level in the program. If you need access to the stack, you may either pass a reference to it to functions such as \code{algorithm\_and\_gradient}, or alternatively you can use the \code{adept::active\_stack()} function to return a pointer to the currently active stack object. \item Put it in a class so that it is accessible to member functions; this approach is demonstrated in section \ref{sec:realworld}. \end{itemize} % \subsection{Initialize independent variables and start recording} \begin{lstlisting} adouble x[2] = {x_val[0], x_val[1]}; stack.new_recording(); \end{lstlisting} The first line here simply copies the input values to the algorithm into \code{adouble} variables. These are the \emph{independent variables}, but note that there is no obligation for these to be stored as one array (as in CppAD), and for forward- and reverse-mode automatic differentiation you do not need to tell \Adept\ explicitly via a function call which variables are the independent ones. The next line clears all differential statements from the stack so that it is ready for a new recording of differential information. % Note that the first line here actually stores two differential statements, $\delta$\code{x[0]=0} and $\delta$\code{x[1]=0}, which are immediately cleared by the \code{new\_recording} function call. To avoid the small overhead of storing redundant information on the stack, we could replace the first line with \begin{lstlisting} x[0].set_value(x_val[0]); x[1].set_value(x_val[1]); \end{lstlisting} or \begin{lstlisting} adept::set_values(x, 2, x_val); \end{lstlisting} which have the effect of setting the values of \code{x} without storing the equivalent differential statements. Previous users of \Adept\ version 0.9 should note that since version 1.0, the \code{new\_recording} function replaces the \code{start} function call, which had to be put \emph{before} the independent variables were initialized. The problem with this was that the independent variables had to be initialized with the \code{set\_value} or \code{set\_values} functions, otherwise the gradients coming out of the automatic differentiation would all be zero. Since it was easy to forget this, \code{new\_recording} was introduced to allow the independent variables to be assigned in the normal way using the assignment operator (\code{=}). But don't just replace \code{start} in your version-0.9-compatible code with \code{new\_recording}; the latter must appear \emph{after} the independent variables have been initialized. \subsection{Perform calculations to be differentiated} \begin{lstlisting} adouble y = algorithm(x); \end{lstlisting} The algorithm is called, and behind the scenes the equivalent differential statement for every mathematical statement is stored in the stack. The result of the forward calculation is stored in \code{y}, known as a dependent variable. This example has one dependent variable, but any number is allowed, and they could be returned in another way, e.g. by passing a non-constant array to algorithm that is filled with the final values when the function returns. % \subsection{Perform reverse-mode differentiation} \begin{lstlisting} y.set_gradient(1.0); stack.compute_adjoint(); \end{lstlisting} The first line sets the initial gradient (or adjoint) of \code{y}. In this example, we want the output gradients to be the derivatives of \code{y} with respect to each of the independent variables; to achieve this, the initial gradient of \code{y} must be unity. More generally, if \code{y} was only an intermediate value in the computation of cost function $J$, then for the outputs of the function to be the derivatives of $J$ with respect to each of the independent variables, we would need to set the gradient of \code{y} to $\partial J/\partial$\code{y}. In the case of multiple intermediate values, a separate call to \code{set\_gradient} is needed for each intermediate value. If \code{y} was an array of length \code{n} then the gradient of each element could be set to the values in a \code{double} array \code{y\_ad} using \begin{lstlisting} adept::set_gradients(y, n, y_ad); \end{lstlisting} The \code{compute\_adjoint()} member function of stack performs the adjoint calculation, sweeping in reverse through the differential statements stored on the stack. Note that this must be preceded by at least one \code{set\_gradient} or \code{set\_gradients} call, since the first such call initializes the list of gradients for \code{compute\_adjoint()} to act on. Otherwise, \code{compute\_adjoint()} will throw a \code{gradients\_not\_initialized} exception. \subsection{Extract the final gradients} \begin{lstlisting} dy_dx[0] = x[0].get_gradient(); dy_dx[1] = x[1].get_gradient(); \end{lstlisting} These lines simply extract the gradients of the cost function with respect to the two independent variables. Alternatively we could have extracted them simultaneously using \begin{lstlisting} adept::get_gradients(x, 2, dy_dx); \end{lstlisting} To do forward-mode differentiation in this example would involve setting the initial gradients of \code{x} instead of \code{y}, calling the member function \code{compute\_tangent\_linear()} instead of \code{compute\_adjoint()}, and extracting the final gradients from \code{y} instead of \code{x}. \section{Computing Jacobian matrices} \label{sec:jacobian} Until now we have considered a function with two inputs and one output. Consider the following more general function whose declaration is \begin{lstlisting} void algorithm2(int n, const adouble* x, int m, adouble* y); \end{lstlisting} where \code{x} points to the \code{n} independent (input) variables and \code{y} points to the \code{m} dependent (output) variables. The following function would return the full Jacobian matrix: % \begin{lstlisting} #include #include void algorithm2_jacobian( int n, // Number of input values const double* x_val, // Input values int m, // Number of output values double* y_val, // Output values double* jac) { // Output Jacobian matrix using adept::adouble; // Import Stack and adouble from adept adept::Stack stack; // Where the derivative information is stored std::vector x(n); // Vector of active input variables adept::set_values(&x[0], n, x_val); // Initialize adouble inputs stack.new_recording(); // Start recording std::vector y(m); // Create vector of active output variables algorithm2(n, &x[0], m, &y[0]); // Run algorithm stack.independent(&x[0], n); // Identify independent variables stack.dependent(&y[0], m); // Identify dependent variables stack.jacobian(jac); // Compute & store Jacobian in jac for (int iy = 0; iy < m; ++iy) y_val[iy] = y[iy].value(); // Extract value from active object } \end{lstlisting} % Note that: \begin{itemize} \item The \code{independent} member function of stack is used to identify the independent variables, i.e.\ the variables that the derivatives in the Jacobian matrix will be with respect to. In this example there are \code{n} independent variables located together in memory and so can be identified all at once. Multiple calls are possible to identify further independent variables. To identify a single independent variable, call \code{independent} with just one argument, the independent variable (not as a pointer). \item The \code{dependent} member function of stack identifies the dependent variables, and its usage is identical to \code{independent}. \item The memory provided to store the Jacobian matrix (pointed to by \code{jac}) must be a one-dimensional array of size \code{m}$\times$\code{n}, where \code{m} is the number of dependent variables and \code{n} is the number of independent variables. \item The resulting matrix is stored in the sense of the index representing the dependent variables varying fastest (column-major order). % To get row-major order, call the \code{jacobian} function % with a second argument of \code{true} (see section \ref{sec:stack}). \item Internally, the Jacobian calculation is performed by multiple forward or reverse passes, whichever would be faster (dependent on the numbers of independent and dependent variables). \item The use of \code{std::vector} rather than \code{new adouble[n]} ensures no memory leaks in the case of an exception being thrown, since the memory associated with \code{x} and \code{y} will be automatically deallocated when they go out of scope. \end{itemize}% As described in chapter \ref{chap:arrays}, \Adept\ version 2.0 introduced built-in multi-dimensional arrays of both active (e.g.\ \code{aVector} and passive (e.g.\ \code{Vector}) variables. It therefore seems more natural to express the algorithm above in terms of these objects, which could be done as follows: \begin{lstlisting} #include // Adept vectors know their own length, so lengths do not need to be // passed in as well adept::aVector algorithm2(const adept::aVector& x); void algorithm2_jacobian( const adept::Vector& x_val, // Input values adept::Vector& y_val, // Output values (correctly sized or empty) adept::Matrix& jac) { // Output Jacobian matrix (correctly sized) adept::Stack stack; // Where the derivative information is stored adept::aVector x = x_val; // Active vector of inputs stack.new_recording(); // Start recording adept::aVector y = algorithm2(x); // Run algorithm and store outputs stack.independent(x); // Identify independent variables stack.dependent(y); // Identify dependent variables stack.jacobian(jac); // Compute & store Jacobian (since Adept 2.0.8) // If jac is empty we can automatically resize it using this instead (since Adept 2.0.8): //jac = stack.jacobian(); y_val = value(y); // Extract the values from the active array } \end{lstlisting} \section{Real-world usage: interfacing \Adept\ to a third-party minimization library} \label{sec:realworld} Suppose we want to find the vector $\x$ that minimizes an cost function $J(\x)$ that consists of a large algorithm coded using the \Adept\ library and encapsulated within a C++ class. In this section we illustrate how it may be interfaced to a third-party minimization algorithm with a C-style interface, specifically the free one in the GNU Scientific Library. Note that since version 2.0.8, \Adept\ provides its own minimization functionality, as described in chapter \ref{chap:optimize}. The full working version of this example, using the N-dimensional Rosenbrock banana function as the function to be minimized, is in \code{test/test\_gsl\_interface.cpp} of the \Adept\ software package (see the description of Test 4 in \code{test/README}). The interface to the algorithm is as follows: % \begin{lstlisting} #include #include using adept::adouble; class State { public: // Construct a state with n state variables State(int n) { active_x_.resize(n); x_.resize(n); } // Minimize the function, returning true if minimization successful, false otherwise bool minimize(); // Get copy of state variables after minimization void x(std::vector& x_out) const; // For input state variables x, compute the function J(x) and return it double calc_function_value(const double* x); // For input state variables x, compute function and put its gradient in dJ_dx double calc_function_value_and_gradient(const double* x, double* dJ_dx); // Return the size of the state vector unsigned int nx() const { return active_x_.size(); } private: // Active version: the algorithm is contained in the definition of this function adouble calc_function_value(const adouble* x); // DATA adept::Stack stack_; // Adept stack object (must be before active state // variables, e.g. adouble, in class definition) std::vector active_x_; // Active state variables (must be after Stack) }; \end{lstlisting} % The algorithm itself is contained in the definition of \code{calc\_function\_value(const adouble*)}, which is implemented using \code{adouble} variables (following the rules in section \ref{sec:preparation}). However, the public interface to the class uses only standard \code{double} types, so the use of \Adept\ is hidden to users of the class. Of course, a complicated algorithm may be implemented in terms of multiple classes that do exchange data via \code{adouble} objects. We will be using a quasi-Newton minimization algorithm that calls the algorithm many times with trial vectors $\x$, and for each call may request not only the value of the function, but also its gradient with respect to $\x$. Thus the public interface provides \code{calc\_function\_value(const double*)} and \code{calc\_function\_value\_and\_gradient}, which could be implemented as follows: % \begin{lstlisting} double State::calc_function_value(const double* x) { for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i]; stack_.new_recording(); return value(calc_function_value(&active_x_[0])); } double State::calc_function_value_and_gradient(const double* x, double* dJ_dx) { for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i]; stack_.new_recording(); adouble J = calc_function_value(&active_x_[0]); J.set_gradient(1.0); stack_.compute_adjoint(); adept::get_gradients(&active_x_[0], nx(), dJ_dx); return value(J); } \end{lstlisting} % The first function simply copies the \code{double} inputs into an \code{adouble} vector and runs the version of \code{calc\_function\_value} for \code{adouble} arguments. Obviously there is an inefficiency here in that gradients are recorded that are then not used, and this function would be typically 2.5--3 times slower than an implementation of the algorithm that did not store gradients. Section \ref{sec:withwithout} describes three ways to overcome this problem. The second function above implements reverse-mode automatic differentiation as described in section \ref{sec:adjoint}. The \code{minimize} member function could be implemented using GSL as follows: % \begin{lstlisting} #include #include bool State::minimize() { // Minimizer settings const double initial_step_size = 0.01; const double line_search_tolerance = 1.0e-4; const double converged_gradient_norm = 1.0e-3; // Use the "limited-memory BFGS" quasi-Newton minimizer const gsl_multimin_fdfminimizer_type* minimizer_type = gsl_multimin_fdfminimizer_vector_bfgs2; // Declare and populate structure containing function pointers gsl_multimin_function_fdf my_function; my_function.n = nx(); my_function.f = my_function_value; my_function.df = my_function_gradient; my_function.fdf = my_function_value_and_gradient; my_function.params = reinterpret_cast(this); // Set initial state variables using GSL's vector type gsl_vector *x; x = gsl_vector_alloc(nx()); for (unsigned int i = 0; i < nx(); ++i) gsl_vector_set(x, i, 1.0); // Configure the minimizer gsl_multimin_fdfminimizer* minimizer = gsl_multimin_fdfminimizer_alloc(minimizer_type, nx()); gsl_multimin_fdfminimizer_set(minimizer, &my_function, x, initial_step_size, line_search_tolerance); // Begin loop size_t iter = 0; int status; do { ++iter; // Perform one iteration status = gsl_multimin_fdfminimizer_iterate(minimizer); // Quit loop if iteration failed if (status != GSL_SUCCESS) break; // Test for convergence status = gsl_multimin_test_gradient(minimizer->gradient, converged_gradient_norm); } while (status == GSL_CONTINUE && iter < 100); // Free memory gsl_multimin_fdfminimizer_free(minimizer); gsl_vector_free(x); // Return true if successfully minimized function, false otherwise if (status == GSL_SUCCESS) { std::cout << "Minimum found after " << iter << " iterations\n"; return true; } else { std::cout << "Minimizer failed after " << iter << " iterations: " << gsl_strerror(status) << "\n"; return false; } } \end{lstlisting} % The GSL interface requires three functions to be defined, each of which takes a vector of state variables $\x$ as input: \code{my\_function\_value}, which returns the value of the function; \code{my\_function\_gradient}, which returns the gradient of the function with respect to $\x$; and \code{my\_function\_value\_and\_gradient}, which returns the value and the gradient of the function. These functions are provided to GSL as function pointers (see above), but since GSL is a C library, we need to use the `\code{extern "C"}' specifier in their definition. Thus the function definitions would be: % \begin{lstlisting} extern "C" double my_function_value(const gsl_vector* x, void* params) { State* state = reinterpret_cast(params); return state->calc_function_value(x->data); } extern "C" void my_function_gradient(const gsl_vector* x, void* params, gsl_vector* gradJ) { State* state = reinterpret_cast(params); state->calc_function_value_and_gradient(x->data, gradJ->data); } extern "C" void my_function_value_and_gradient(const gsl_vector* x, void* params, double* J, gsl_vector* gradJ) { State* state = reinterpret_cast(params); *J = state->calc_function_value_and_gradient(x->data, gradJ->data); } \end{lstlisting} % When the \code{gsl\_multimin\_fdfminimizer\_iterate} function is called, it chooses a search direction and performs several calls of these functions to approximately minimize the function along this search direction. The \code{this} pointer (i.e.\ the pointer to the \code{State} object), which was provided to the \code{my\_function} structure in the definition of the \code{minimize} function above, is provided as the second argument to each of the three functions above. Unlike in C, in C++ this pointer needs to be cast back to a pointer to a \code{State} type, hence the use of \code{reinterpret\_cast}. That's it! A call to \code{minimize} should successfully minimize well behaved differentiable multi-dimensional functions. It should be straightforward to adapt the above to work with other minimization libraries. \section{Calling an algorithm with and without automatic differentiation from the same program} \label{sec:withwithout} The \code{calc\_function\_value(const double*)} member function defined in section \ref{sec:realworld} is sub-optimal in that it simply calls the \code{calc\_function\_value(const adouble*)} member function, which not only computes the value of the function, it also records the derivative information of all the operations involved. This information is then ignored. This overhead makes the function typically 2.5--3 times slower than it needs to be, although sometimes (specifically for loops containing no trancendental functions) the difference between an algorithm coded in terms of \code{double}s and the same algorithm coded in terms of \code{adouble}s can exceed a factor of 10 \citep{Hogan2014}. The impact on the computational speed of the entire minimization process depends on how many requests are made for the function value only as opposed to the gradient of the function, and can be significant. We require a way to avoid the overhead of \Adept\ computing the derivative information for calls to \code{calc\_function\_value(const double*)}, without having to maintain two versions of the algorithm, one coded in terms of \code{double}s and the other in terms of \code{adouble}s. The three ways to achieve this are now described. % \subsection{Function templates} \label{sec:func_templates} The simplest approach is to use a function template for those functions that take active arguments, as demonstrated in the following example: % \begin{lstlisting} #include class State { public: ... template xdouble calc_function_value(const xdouble* x); ... }; // Example function definition that must be in a header file included // by any source file that calls calc_function_value template inline xdouble State::calc_function_value(const xdouble* x) { xdouble y = 4.0; xdouble s = 2.0*x[0] + 3.0*x[1]*x[1]; y *= sin(s); return y; } \end{lstlisting} % This takes the example from section \ref{sec:preparation} and replaces \code{adouble} by the template type \code{xdouble}. Thus, \code{calc\_function\_value} can be called with either \code{double} or \code{adouble} arguments, and the compiler will compile inline the inactive or active version accordingly. Note that the function template need not be a member function of a class. This technique is good if only a small amount of code needs to be differentiated, but for large models the use of inlining is likely to lead to duplication of compiled code leading to large executables and long compile times. The following two approaches do not have this drawback and are suitable for large codes. \subsection{Pausable recording} \label{sec:pausable} The second method involves compiling the entire code with the \code{ADEPT\_RECORDING\_PAUSABLE} preprocessor variable defined, which can be done by adding an argument \code{-DADEPT\_RECORDING\_PAUSABLE} to the compler command line. This modifies the behaviour of mathematical operations performed on \code{adouble} variables: instead of performing the operation and then storing the derivative information, it performs the operation and then only stores the derivative information if the \Adept\ stack is not in the ``paused'' state. We then use the following member function definition instead of the one in section \ref{sec:realworld}: % \begin{lstlisting} double State::calc_function_value(const double* x) { stack_.pause_recording(); for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i]; double J = value(calc_function_value(&active_x_[0])); stack_.continue_recording(); return J; } \end{lstlisting} % By pausing the recording for all operations on \code{adouble} objects, most of the overhead of storing derivative information is removed. The extra run-time check to see whether the stack is in the paused state, which is carried out by mathematical operations involving \code{adouble} objects, generally adds a small overhead. However, in algorithms where most of the number crunching occurs in loops containing no trancendental functions, even if the stack is in the paused state, the presence of the check can prevent the compiler from agressively optimizing the loop. In that instance the third method may be preferable. % \subsection{Multiple object files per source file} \label{sec:multipleobjects} The third method involves compiling each source file containing functions with \code{adouble} arguments twice. The first time, the code is compiled normally to produce an object file containing compiled functions including automatic differentiation. The second time, the code is compiled with the \code{-DADEPT\_NO\_AUTOMATIC\_DIFFERENTIATION} flag on the compiler command line. This instructs the \code{adept.h} header file to turn off automatic differentiation by defining the \code{adouble} type to be an alias of the \code{double} type. This way, a second set of object files are created containing overloaded versions of the same functions as the first set but this time without automatic differentiation. These object files can be compiled together to form one executable. In the example presented in section \ref{sec:realworld}, the \code{calc\_function\_value} function would be one that would be compiled twice in this way, once to provide the \code{calc\_function\_value(const adouble*)} version and the other to provide the \code{calc\_function\_value(const double*)} version. Note that any functions that do not include \code{adouble} arguments must be compiled only once, because otherwise the linker will complain about multiple versions of the same function. The following shows a Makefile from a hypothetical project that compiles two source files (\code{algorithm1.cpp} and \code{algorithm2.cpp}) twice and a third (\code{main.cpp}) once: % \begin{lstlisting}[language=make] # Specify compiler and flags CXX = g++ CXXFLAGS = -Wall -O3 -g # Normal object files to be created OBJECTS = algorithm1.o algorithm2.o main.o # Object files created with no automatic differentiation NO_AD_OBJECTS = algorithm1_noad.o algorithm2_noad.o # Program name PROGRAM = my_program # Include-file location INCLUDES = -I/usr/local/include # Library location and name, plus the math library LIBS = -L/usr/local/lib -lm -ladept # Rule to build the program (typing "make" will use this rule) $(PROGRAM): $(OBJECTS) $(NO_AD_OBJECTS) $(CXX) $(CXXFLAGS) $(OBJECTS) $(NO_AD_OBJECTS) $(LIBS) -o $(PROGRAM) # Rule to build a normal object file (used to compile all objects in OBJECTS) %.o: %.cpp $(CXX) $(CXXFLAGS) $(INCLUDES) -c $< # Rule to build a no-automatic-differentiation object (used to compile ones in NO_AD_OBJECTS) %_noad.o: %.cpp $(CXX) $(CXXFLAGS) $(INCLUDES) -DADEPT_NO_AUTOMATIC_DIFFERENTIATION -c $< -o $@ \end{lstlisting} % There is a further modification required with this approach, which arises because if a header file declares both the \code{double} and \code{adouble} versions of a function, then when compiled with \code{-DADEPT\_NO\_AUTOMATIC\_DIFFERENTIATION} it appears to the compiler that the same function is declared twice, leading to a compile-time error. This can be overcome by using the preprocessor to hide the \code{adouble} version if the code is compiled with this flag, as follows (using the example from section \ref{sec:realworld}): % \begin{lstlisting} #include class State { public: ... double calc_function_value(const double* x); private: #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION adouble calc_function_value(const adouble* x); #endif ... }; \end{lstlisting} A final nuance is that if the code contains an \code{adouble} object \code{x}, then \code{x.value()} will work fine in the compilation when \code{x} is indeed of type \code{adouble}, but in the compilation when it is set to a simple \code{double} variable, the \code{value()} member function will not be found. Hence it is better to use \code{adept::value(x)}, which returns a \code{double} regardless of the type of \code{x}, and works regardless of whether the code was compiled with or without the \code{-DADEPT\_NO\_AUTOMATIC\_DIFFERENTIATION} flag. \section{Interfacing with software containing hand-coded Jacobians} \label{sec:interfacehandcoded} Often a complicated algorithm will include multiple components. Components of the code written in C or C++ for which the source is available are straightforward to convert to using \Adept, following the rules in section \ref{sec:preparation}. For components written in Fortran, this is not possible, but if such components have their own hand-coded Jacobian then it is possible to interface \Adept\ to them. More generally, in certain situations automatic differentiation is much slower than hand-coding \cite[see the Lax-Wendroff example in][]{Hogan2014} and we may wish to hand-code certain critical parts. In general the Jacobian matrix is quite expensive to compute, so this interfacing strategy makes most sense if the component of the algorithm has a small number of inputs or a small number of outputs. A full working version of the following example is given as ``Test 3'' in the \code{test} directory of the \Adept\ package (see specifically \code{test/README} and \code{test/test\_radiances.cpp}). Consider the example of a radiative transfer model for simulating satellite microwave radiances at two wavelengths, $I$ and $J$, which takes as input the surface temperature $T_s$ and the vertical profile of atmospheric temperature $T$ from a numerical weather forecast model. Such a model would be used in a data assimilation system to assimilate the temperature information from the satellite observations into the weather forecast model. In addition to returning the radiances, the model returns the gradient $\partial I/\partial T_s$ and the gradients $\partial I/\partial T_i$ for all height layers $i$ between 1 and $n$, and likewise for radiance $J$. The interface to the radiative transfer model is the following: % \begin{lstlisting} void simulate_radiances(int n, // Size of temperature array // Input variables: double surface_temperature, const double* temperature, // Output variables: double radiance[2], // Output Jacobians: double dradiance_dsurface_temperature[2], double* dradiance_dtemperature); \end{lstlisting} % The calling function needs to allocate \code{2*n} elements for the temperature Jacobian \code{dradiance\_dtemperature} to be stored, and the stored Jacobian will be oriented such that the radiance index varies fastest. \Adept\ needs to be told how to relate the radiance perturbations $\delta I$ and $\delta J$, to perturbations in the input variables, $\delta T_s$ and $\delta T_i$ (for all layers $i$). Mathematically, we wish the following relationship to be stored within the \Adept\ stack: % \begin{equation} \delta I = \frac{\partial I}{\partial T_s}\delta T_s+\sum_{i=1}^n\frac{\partial I}{\partial T_i}\delta T_i.\nonumber \end{equation} % This is achieved with the following wrapper function, which has \code{adouble} inputs and outputs and therefore can be called from within other parts of the algorithm that are coded in terms of \code{adouble} objects: % \begin{lstlisting} void simulate_radiances_wrapper(int n, const adouble& surface_temperature, const adouble* temperature, adouble radiance[2]) { // Create inactive (double) versions of the active (adouble) inputs double st = value(surface_temperature); std::vector t(n); for (int i = 0; i < n; ++i) t[i] = value(temperature[i]); // Declare variables to hold the inactive outputs and their Jacobians double r[2]; double dr_dst[2]; std::vector dr_dt(2*n); // Call the non-Adept function simulate_radiances(n, st, &t[0], &r[0], dr_dst, &dr_dt[0]); // Copy the results into the active variables, but use set_value in order // not to write any equivalent differential statement to the Adept stack radiance[0].set_value(r[0]); radiance[1].set_value(r[1]); // Loop over the two radiances and add the differential statements to the Adept stack for (int i = 0; i < 2; ++i) { // Add the first term on the right-hand-side of Equation 1 in the text radiance[i].add_derivative_dependence(surface_temperature, dr_dst[i]); // Now append the second term on the right-hand-side of Equation 1. The third argument // "n" of the following function says that there are n terms to be summed, and the fourth // argument "2" says to take only every second element of the Jacobian dr_dt, since the // derivatives with respect to the two radiances have been interlaced. If the fourth // argument is omitted then relevant Jacobian elements will be assumed to be contiguous // in memory. radiance[i].append_derivative_dependence(temperature, &dr_dt[i], n, 2); } } \end{lstlisting} % In this example, the form of \code{add\_derivative\_dependence} for one variable on the right-hand-side of the derivative expression has been used, and the form of \code{append\_derivative\_dependence} for an array of variables on the right-hand-side has been used. As described in section \ref{sec:adouble}, both functions have forms that take single variables and arrays as arguments. Note also that the use of \code{std::vector} rather than \code{new double[n]} ensures that if \code{simulate\_radiances} throws an exception, the memory allocated to hold \code{dr\_dt} will be freed correctly. \section{Member functions of the \codestyle{Stack} class} \label{sec:stack} This section describes the user-oriented member functions of the \code{Stack} class. Some functions have arguments with default values; if these arguments are omitted then the default values will be used. Some of these functions throw \Adept\ exceptions, defined in section \ref{sec:exceptions}. \begin{description} \citem{Stack(bool activate\_immediately = true)} The constructor for the \codebf{Stack} class. Normally \codebf{Stack} objects are constructed with no arguments, which means that the object will attempt to make itself the currently active stack by placing a pointer to itself into a global variable. If another \codebf{Stack} object is currently active, then the present one will be fully constructed, left in the unactivated state, and an \code{stack\_already\_active} exception will be thrown. If a \codebf{Stack} object is constructed with an argument ``\codebf{false}'', it will be started in an unactivated state, and a subsequent call to its member function \codebf{activate} will be needed to use it. % \citem{void new\_recording()} Clears all the information on the stack in order that a new recording can be started. Specifically this function clears all the differential statements, the list of independent and dependent variables (used in computing Jacobian matrices) and the list of gradients used by the \codebf{compute\_tangent\_linear} and \codebf{compute\_adjoint} functions. Note that this function leaves the memory allocated to reduce the overhead of reallocation in the new recordings. % \citem{bool pause\_recording()} Stops recording differential information every time an \code{adouble} statement is executed. This is useful if within a single program an algorithm needs to be run both with and without automatic differentiation. This option is only effective within compilation units compiled with \code{ADEPT\_RECORDING\_PAUSABLE} defined; if it is, the function returns \code{true}, otherwise it returns \code{false}. Further information on using this and the following function are provided in section \ref{sec:pausable}. % \citem{bool continue\_recording()} Instruct a stack that may have previously been put in a paused state to now continue recording differential information as normal. This option is only effective within compilation units compiled with \code{ADEPT\_RECORDING\_PAUSABLE} defined; if it is, the function returns \code{true}, otherwise it returns \code{false}. % \citem{bool is\_recording()} Returns \code{false} if recording has been paused with \code{pause\_recording()} and the code has been compiled with \code{ADEPT\_RECORDING\_PAUSABLE} defined. Otherwise returns \code{true}. % \citem{void compute\_tangent\_linear()} Perform a tangent-linear calculation (forward-mode differentiation) using the stored differential statements. Before calling this function you need call the \code{adouble::set\_gradient} or \code{set\_gradients} function (see section \ref{sec:adouble}) on the independent variables to set the initial gradients, otherwise the function will throw a \code{gradients\_not\_initialized} exception. This function is synonymous with \codebf{forward()}. % \citem{void compute\_adjoint()} Perform an adjoint calculation (reverse-mode differentiation) using the stored differential statements. Before calling this function you need call the \code{adouble::set\_gradient} or \code{set\_gradients} function on the dependent variables to set the initial gradients, otherwise the function will throw a \code{gradients\_not\_initialized} exception. This function is synonymous with \codebf{reverse()}. % \citem{void independent(const adouble\&\ x)} Before computing Jacobian matrices, you need to identify the independent and dependent variables, which correspond to the columns and rows of he Jacobian, respectively. This function adds \codebf{x} to the list of independent variables. If it is the $n$th variable identified in this way, the $n$th column of the Jacobian will correspond to derivatives with respect to \codebf{x}. \citem{void dependent(const adouble\&\ y)} Add \codebf{y} to the list of dependent variables. If it is the $m$th variable identified in this way, the $m$th row of the Jacobian will correspond to derivatives of \codebf{y} with respect to each of the independent variables. \citem{void independent(const adouble* x\_ptr, \Offset\ n)} Add \codebf{n} independent variables to the list, which must be stored consecutively in memory starting at the memory pointed to by \codebf{x\_ptr}. \citem{void dependent(const adouble* y\_ptr, \Offset\ n)} Add \codebf{n} dependent variables to the list, which must be stored consecutively in memory starting at the memory pointed to by \codebf{y\_ptr}. % \citem{void jacobian(double* jacobian\_out)} Compute the Jacobian matrix, i.e., the gradient of the $m$ dependent variables (identified with the \codebf{dependent(...)} function) with respect to the $n$ independent variables (identified with \codebf{independent(...)}. The result is returned in the memory pointed to by \codebf{jacobian\_out}, which must have been allocated to hold $m\times n$ values. The result is stored in column-major order, i.e., the $m$ diemension of the matrix varies fastest. If no dependents or independents have been identified, then the function will throw a \code{dependents\_or\_independents\_not\_identified} exception. In practice, this function calls \codebf{jacobian\_forward} if $n\le m$ and \codebf{jacobian\_reverse} if $n>m$. % \citem{void jacobian(Matrix jac)} Compute Jacobian matrix and store in a correctly sized \Adept\ \code{Matrix} object \codebf{jac}, which may be a subset of an larger matrix. See chapter \ref{chap:arrays} for a full description of \Adept\ array objects. % \citem{Matrix jacobian()} As above but the Jacobian matrix is returned from the function. % \citem{void jacobian\_forward(double* jacobian\_out)} Compute the Jacobian matrix by executing $n$ forward passes through the stored list of differential statements; this is typically faster than \codebf{jacobian\_reverse} for $n\le m$. % \citem{void jacobian\_forward(Matrix jac)} As above but store in a correctly sized \Adept\ \code{Matrix} object \codebf{jac}. % \citem{Matrix jacobian\_forward()} As above but the Jacobian matrix is returned from the function. % \citem{void jacobian\_reverse(double* jacobian\_out)} Compute the Jacobian matrix by executing $m$ reverse passes through the stored list of differential statements; this is typically faster than \codebf{jacobian\_forward} for $n>m$. % \citem{void jacobian\_reverse(Matrix jac)} As above but store in a correctly sized \Adept\ \code{Matrix} object \codebf{jac}. % \citem{Matrix jacobian\_reverse()} As above but the Jacobian matrix is returned from the function. % \citem{void clear\_gradients()} Clear the gradients set with the \code{set\_gradient} member function of the \code{adouble} class. This enables multiple adjoint and/or tangent-linear calculations to be performed with the same recording. % \citem{void clear\_independents()} Clear the list of independent variables, enabling a new Jacobian matrix to be computed from the same recording but for a different set of independent variables. % \citem{void clear\_dependents()} Clear the list of dependent variables, enabling a new Jacobian matrix to be computed from the same recording but for a different set of dependent variables. % \citem{\Offset\ n\_independents()} Return the number of independent variables that have been identified. % \citem{\Offset\ n\_dependents()} Return the number of dependent variables that have been identified. % \citem{\Offset\ n\_statements()} Return the number of differential statements in the recording. % \citem{\Offset\ n\_operations()} Return the total number of operations in the recording, i.e the total number of terms on the right-hand-side of all the differential statements. % \citem{\Offset\ max\_gradients()} Return the number of working gradients that need to be stored in order to perform a forward or reverse pass. % \citem{size\_t memory()} Return the number of bytes currently used to store the differential statements and the working gradients. Note that this does not include memory allocated but not currently used. % \citem{\Offset\ n\_gradients\_registered()} Each time an \code{adouble} object is created, it is allocated a unique index that is used to identify its gradient in the recorded differential statements. When the object is destructed, its index is freed for reuse. This function returns the number of gradients currently registered, equal to the number of \code{adouble} objects currently created. % \citem{void print\_status(std::ostream\&\ os = std::cout)} Print the current status of the \codebf{Stack} object, such as number of statements and operations stored and allocated, to the stream specified by \codebf{os}, or standard output if this function is called with no arguments. Sending the \codebf{Stack} object to the stream using the ``\code{<<}'' operator results in the same behaviour. % \citem{void print\_statements(std::ostream\&\ os = std::cout)} Print the list of differential statements to the specified stream (or standard output if not specified). Each line corresponds to a separate statement, for example ``\code{d[3] = 1.2*d[1] + 3.4*d[2]}''. % \citem{bool print\_gradients(std::ostream\&\ os = std::cout)} Print the vector of gradients to the specified stream (or standard output if not specified). This function returns \code{false} if no \code{set\_gradient} function has been called to set the first gradient and initialize the vector, and \code{true} otherwise. To diagnose what \codebf{compute\_tangent\_linear} and \codebf{compute\_adjoint} are doing, it can be useful to call \codebf{print\_gradients} immediately before and after. % \citem{void activate()} Activate the \codebf{Stack} object by copying its \code{this} pointer to a global variable that will be accessed by subsequent operations involving \code{adouble} objects. If another \codebf{Stack} is already active, a \code{stack\_already\_active} exception will be thrown. To check whether this is the case before calling \codebf{activate()}, check that the \code{active\_stack()} function (described below) returns \code{0}. % \citem{void deactivate()} Deactivate the \codebf{Stack} object by checking whether the global variable holding the pointer to the currently active \codebf{Stack} is equal to \code{this}, and if it is, setting it to \code{0}. % \citem{bool is\_active()} Returns \code{true} if the \codebf{Stack} object is the currently active one, \code{false} otherwise. % \citem{void start()} This function was present in version 0.9 to activate a \codebf{Stack} object, since in that version they were not constructed in an activated state. This function has now been deprecated and will always throw a \code{feature\_not\_available} exception. \citem{int max\_jacobian\_threads()} Return the maximum number of OpenMP threads available for Jacobian calculations. The number will be 1 if either the library was or the current source code is compiled without OpenMP support (i.e.\ without the \code{-fopenmp} compiler and linker flag). (Introduced in \Adept\ version 1.1.) \citem{int set\_max\_jacobian\_threads(int n)} Set the maximum number of threads to be used in Jacobian calculations to \code{n}, if possible. A value of 1 indicates that OpenMP will not be used, while a value of 0 indicates that the maximum available will be used. Returns the maximum that will be used, which may be fewer than requested, e.g. 1 if the \Adept\ library was compiled without OpenMP support. (Introduced in \Adept\ version 1.1.) \citem{void preallocate\_statements(int n)} If you know in advance roughly how many differential statements will be stored by an algorithm then you may be able to speed-up the first use of the stack by preallocating the memory needed to store them. More memory will still be allocated if needed, but this should reduce the number of allocations and copies. \citem{void preallocate\_operations(int n)} Likewise, if you know in advance roughly how many operations will be stored then you can speed-up the first use of the stack with this member function. \end{description} \noindent The following non-member functions are provided in the \code{adept} namespace: \begin{description} \citem{adept::Stack* active\_stack()} Returns a pointer to the currently active \codebf{Stack} object, or \code{0} if there is none. \citem{bool is\_thread\_unsafe()} Returns \code{true} if your code has been compiled with \code{ADEPT\_STACK\_THREAD\_UNSAFE}, \code{false} otherwise. % \end{description} \section{Member functions of the \codestyle{adouble} object} \label{sec:adouble} This section describes the user-oriented member functions of the \code{adouble} class. Some functions have arguments with default values; if these arguments are omitted then the default values will be used. Some of these functions throw \Adept\ exceptions, defined in section \ref{sec:exceptions}. \begin{description} \citem{double value()} Return the underlying \code{double} value. % \citem{void set\_value(double x)} Set the value of the \codebf{adouble} object to \codebf{x}, without storing the equivalent differential statement in the currently active stack. % \citem{void set\_gradient(const double\&\ gradient)} Set the gradient corresponding to this \codebf{adouble} variable. The first call of this function (for any \codebf{adouble} variable) after a new recording is made also initializes the vector of working gradients. This function should be called for one or more \codebf{adouble} objects after a recording has been made but before a call to \code{Stack::compute\_tangent\_linear()} or \code{Stack::compute\_adjoint()}. % \citem{void get\_gradient(double\&\ gradient)} Set \codebf{gradient} to the value of the gradient corresponding to this \codebf{adouble} object. This function is used to extract the result after a call to \code{Stack::compute\_tangent\_linear()} or \code{Stack::compute\_adjoint()}. If the \codebf{set\_gradient} function was not called since the last recording was made, this function will throw a \code{gradients\_not\_initialized} exception. The function can also throw a \code{gradient\_out\_of\_range} exception if new \codebf{adouble} objects were created since the first \codebf{set\_gradient} function was called. % \citem{void add\_derivative\_dependence(const adouble\&\ r, const double\&\ g)} Add a differential statement to the currently active stack of the form $\delta \codebf{l}=\codebf{g}\times\delta \codebf{r}$, where \codebf{l} is the \codebf{adouble} object from which this function is called. This function is needed to interface to software containing hand-coded Jacobians, as described in section \ref{sec:interfacehandcoded}; in this case \codebf{g} is the gradient $\partial\codebf{l}/\partial\codebf{r}$ obtained from such software. % \citem{void append\_derivative\_dependence(const adouble\&\ r, const double\&\ g)} Assuming that the same \codebf{adouble} object has just had its \codebf{add\_derivative\_dependence} member function called, this function appends ${}+\codebf{g}\times\delta\codebf{r}$ to the most recent differential statement on the stack. If the calling \codebf{adouble} object is different, then a \code{wrong\_gradient} exception will be thrown. Note that multiple \codebf{append\_derivative\_dependence} calls can be made in succession. % \item[\begin{minipage}{\textwidth}\codesize\texttt{void add\_derivative\_dependence(const adouble* r, const double* g,}\\ \mbox{ }\texttt{\hspace{18em}\Offset\ n = 1, \Offset\ m\_stride = 1)}\end{minipage}] % Add a differential statement to the currently active stack of the form $\delta\codebf{l}=\sum_{i=0}^{\codebf{n}-1}\codebf{m[}i\codebf{]} \times\delta\codebf{r[}i\codebf{]}$, where \codebf{l} is the \codebf{adouble} object from which this function is called. If the \codebf{g\_stride} argument is provided, then the index to the \codebf{g} array will be $i\times\codebf{g\_stride}$ rather than $i$. This is useful if the Jacobian provided is oriented such that the relevant gradients for \codebf{l} are not spaced consecutively. % \item[\begin{minipage}{\textwidth}\codesize\texttt{void append\_derivative\_dependence(const adouble* rhs, const double* g,}\\ \mbox{ }\texttt{\hspace{20em}\Offset\ n = 1, \Offset\ g\_stride = 1)}\end{minipage}] % Assuming that the same \codebf{adouble} object has just called the \codebf{add\_derivative\_dependence} function, this function appends ${}+\sum_{i=0}^{\codebf{n}-1}\codebf{m[}i\codebf{]} \times\delta\codebf{r[}i\codebf{]}$ to the most recent differential statement on the stack. If the calling \codebf{adouble} object is different, then a \code{wrong\_gradient} exception will be thrown. The \codebf{g\_stride} argument behaves the same way as in the previous function described. \end{description} \noindent The following non-member functions are provided in the \code{adept} namespace: \begin{description} \citem{double value(const adouble\& x)} Returns the underlying value of \codebf{x} as a \codebf{double}. This is useful to enable \codebf{x} to be used in \code{fprintf} function calls. It is generally better to use \codebf{adept::value(x)} rather than \codebf{x.value()}, because the former also works if you compile the code with the \code{ADEPT\_NO\_AUTOMATIC\_DIFFERENTIATION} flag set, as discussed in section \ref{sec:multipleobjects}. % \citem{void set\_values(adouble* x, \Offset\ n, const double* x\_val)} Set the value of the \codebf{n} \codebf{adouble} objects starting at \codebf{x} to the values in \codebf{x\_val}, without storing the equivalent differential statement in the currently active stack. % \citem{void set\_gradients(adouble* x, size\_t n, const double* gradients)} Set the gradients corresponding to the \codebf{n} \codebf{adouble} objects starting at \codebf{x} to the \codebf{n} \code{double}s starting at \codebf{gradients}. This has the same effect as calling the \codebf{set\_gradient} member function of each \codebf{adouble} object in turn, but is more concise. % \citem{void get\_gradients(const adouble* y, size\_t n, double* gradients)} Copy the gradient of the \codebf{n} \codebf{adouble} objects starting at \codebf{y} into the \codebf{n} \code{double}s starting at \codebf{gradients}. This has the same effect as calling the \codebf{get\_gradient} member function of each \codebf{adouble} object in turn, but is more concise. This function can throw a \code{gradient\_out\_of\_range} exception if new \codebf{adouble} objects were created since the first \codebf{set\_gradients} function or \codebf{set\_gradient} member function was called. \end{description} \chapter{Using \Adept's array functionality} \label{chap:arrays} \section{Introduction} \label{sec:array_functionality} The design of \Adept's array capability and many of the functions is inspired to a significant extent by the built-in array support in Fortran 90 (and later), and a lesser extent by Matlab, although implemented in the ``C++ way'', e.g.\ default row-major order with all array indices starting from zero. Future additions to the array capability in \Adept\ will attempt to reproduce built-in Fortran array functions if available\footnote{This decision may puzzle some readers, since Fortran is a dirty word to many C++ users due to the limitations of the FORTRAN 77 language. Many of these limitations were overcome in Fortran 90, whose array functionality in particular is rather well designed. Indeed, the pioneering ``Blitz++'' C++ array library \cite[]{Veldhuizen1995} also reproduces many Fortran array functions. All references to Fortran in this document imply the 1990 (or later) standard.}. This design makes \Adept\ a good choice if you have Fortran code that you wish to convert to C++. \Adept\ provides the following array functionality: % \begin{description} \item[Multi-dimensional arrays.] Standard dynamically sized arrays can have an arbitrary number of dimensions (although indexing and slicing is supported only up to 7), and may refer to non-contiguous areas of memory. See section \ref{sec:array}. \item[Mathematical operators and functions.] \Adept\ supports array expressions containing the standard mathematical operators \code{+}, \code{-}, \code{*} and \code{/}, as well as their assignment versions \code{+=}, \code{-=}, \code{*=} and \code{/=}. When applied to arrays, they work ``element-wise'', applying the same operation to every element of the arrays. \Adept\ also supports array operations on all the mathematical functions listed in section \ref{sec:ad_functionality}. The following operators and functions return boolean array expressions: \code{==}, \code{!=}, \code{>}, \code{<}, \code{>=} and \code{<=}, \code{isfinite}, \code{isinf} and \code{isnan}. See section \ref{sec:operators}. \item[Array slicing.] There are many ways to produce an array that references a subset of another array, and therefore can be used as an lvalue in a statement. Arrays can be indexed with scalar integers, a contiguous range of integers, a strided range of integers or an arbitrary list of integers. This is facilitated with ``\code{\_\_}'' (a double underscore) and ``\code{end}'', such that \code{A(\_\_,end-1)} returns a vector pointing to the penultimate column of matrix \code{A}. The member function \code{subset} produces an array pointing to a contiguous subset of the original array, while \code{diag\_vector} and \code{diag\_matrix} produce arrays pointing to the diagonal of the original array. \code{T} produces an array pointing to the transpose of the original array. See section \ref{sec:slice}. \item[Passing arrays to and from functions.] \Adept\ uses a reference-counting approach to implement the storage of array data, enabling multiple array objects to point to the same data, or parts of it in the case of array slices. This makes it straightforward to pass arrays to and from functions without having to perform a deep copy. See section \ref{sec:passing}. \item[Array reduction operations.] The functions \code{sum}, \code{mean}, \code{product}, \code{minval}, \code{maxval} and \code{norm2} perform reduction operations that return an array of lower rank to the expression they are applied to. The functions \code{all} and \code{any} do the same but for boolean expressions. \code{count} returns the number of \code{true} elements in a boolean expression. % The function % \code{find(A)} returns indices to the \code{true} elements of % \code{A}. See section \ref{sec:reduce}. \item[Array expansion operations.] The functions \code{outer\_product} and \code{spread} return an expression of a higher rank than the expression they are applied to. See section \ref{sec:expand} \item[Conditional operations.] Two convenient ways are provided to perform an operation on an array depending on the result of a boolean expression: \code{where} and \code{find}. The statement \code{A.where(B>0)=C} assigns elements of \code{C} to elements of \code{A} whenever the corresponding element of \code{B} is greater than zero. For vectors only, the same result could be obtained with \code{A(find(B>0))=C(find(B>0))}. See section \ref{sec:conditional}. \item[Fixed-size arrays.] \Adept\ provides a fixed-size array class with dimensions (up to seven) that are known at compile time. The functionality is very similar to standard dynamic arrays. \item[Special square matrices.] \Adept\ uses specific classes for symmetric, triangular and band-diagonal matrices, the latter of which use compressed storage and include diagonal and tridiagonal matrices. Certain operations such as matrix multiplication and solving linear equations are optimized especially for these objects. See section \ref{sec:square}. \item[Matrix multiplication.] Matrix multiplication can be applied to one- and two-dimensional arrays using the \code{matmul} function, of for extra syntactic sugar, the ``\code{**}'' pseudo-operator. \Adept\ uses whatever BLAS (Basic Linear Algebra Subroutines) support is available on your system, including optimized versions for symmetric and band-diagonal matrices. See section \ref{sec:matmul}. \item[Linear algebra.] \Adept\ uses the LAPACK library to invert matrices and solve linear systems of equations. See section \ref{sec:la}. \item[Array bounds and alias checking.] \Adept\ checks at compile time that terms in an array expression accord in rank, and at run time that they accord in the size of each dimension. Run-time alias checking is performed to determine if any objects on the right-hand-side of a statement overlap in memory with the left-hand-side of the statement, making a temporary copy of the right-hand-side if they do. This can be overridden with the \code{noalias} function. See section \ref{sec:bounds}. \item[Interoperability with Fortran arrays.] The Fortran 2018 standard enables Fortran's assumed-shape arrays to be passed to and from C/C++. Section \ref{sec:fortran} describes how they can be treated as \Adept\ arrays within C++. \end{description}% % \section{The \codestyle{Array} class} \label{sec:array} The bread and butter of array operations is provided by the \code{Array} class template (in the \code{adept} namespace along with all other public types and classes), which has the following declaration: \begin{lstlisting} namespace adept { template class Array; } \end{lstlisting} The first template argument provides the number of dimensions of the array and may be 1 or greater, although indexing and slicing is only supported up to 7 dimensions. The second argument is the numerical type being stored and can be any simple integer or real number, including \code{bool}. The default type is \code{adept::Real}, which is the default floating-point type the \Adept\ library has been compiled to use for computing derivatives, and is usually \code{double}. The final argument states whether the array is ``active'', i.e.\ whether it participates in the differentiation of an algorithm. A number of typedefs are provided for the most common types of array: \code{Vector}, \code{Matrix}, \code{Array3D} and so on up to \code{Array7D} provide inactive arrays of type \code{Real} and rank 1--7. The corresponding active types are \code{aVector}, \code{aMatrix}, \code{aArray3D} etc. Arrays of other numeric types have the pattern \code{boolVector}, \code{intVector}, \code{floatVector}, \code{afloatVector}, and similarly for matrices and higher dimensional arrays. If you wanted shortcuts for other types you could do the following: \begin{lstlisting} typedef adept::Array<4,unsigned int> uintArray4D; typedef adept::Array<2,long double,true> alongdoubleMatrix; // Active \end{lstlisting} An \code{Array} with uninitialized elements can be constructed in numerous ways: \begin{lstlisting} using namespace adept; Vector v; // Initialize an empty vector Array3D A(3,4,5); // Initialize a 3x4x5 array (up to 7 arguments possible) Matrix M(dimensions(3,4)); // The "dimensions" function takes up to 7 arguments Matrix N(M.dimensions()); // Make N the same size as M \end{lstlisting} In the remaining code examples it will be assumed that \code{using namespace adept} has already been called. When new memory is needed, the \code{Array} object creates a \code{Storage} object that contains the memory needed, and stores pointers to both the \code{Storage} object and the start of the data. By default the data are accessed in C-style row-major order (i.e.\ the final index corresponds to the array dimension that varies most rapidly in memory). However, this is flexible since in addition to storing the length of each of its $n$ dimensions, a rank-$n$ \code{Array} also stores $n$ ``offsets'' that define the separation of elements in memory in each dimension. Thus, a 3-by-4 matrix with row-major storage would store offsets of (4,1). The same size matrix would use column-major storage simply by storing offsets of (1,3). To make new arrays use column-major storage, call the following function: \begin{lstlisting} set_array_row_major_order(false); \end{lstlisting} Note that this does not change the storage of any existing objects. Note also that when array expressions are evaluated, the data are requested in row-major order, so the use of column-major arrays will incur a performance penalty. An \code{Array} may also be constructed such that it immediately contains data: \begin{lstlisting} Vector v = M(__,0); // Link to a existing array, in this case the first column of M Vector v(M(__,0)); // Has exactly the same effect as the previous example Matrix N = log(M); // Initialize with the size and values of a mathematical expression \end{lstlisting} It can be seen from the constructors involving \code{Vector}s that an \code{Array} can be configured to ``link'' to part of an existing \code{Array}, and modifications to the numbers in one will be seen by the other. This is a very useful feature as it allows slices of an array to be passed to functions and modified; see section \ref{sec:slice}. Note that the array or sub-array being linked to must be of the same rank, type and activeness as the linking array. Internally, linking is achieved by both the arrays pointing to the same \code{Storage} object, which itself contains a reference count of the number of arrays pointing to it. When an \code{Array} is destructed the reference count is reduced by one and only if it falls to zero will the data get deallocated. This ensures that if the \code{Array} being linked to goes out of scope, the linking \code{Array} will ``steal'' the data. You can also make an \code{Array} point to data not held in a \code{Storage} object, for example in a function whose interface is only in terms of intrinsic C types: \begin{lstlisting} double my_norm2(int n, double* ptr) { Vector x(ptr, dimensions(n)); // Create a Vector pointing to existing data return norm2(x); // Use Adept's L2-norm function } \end{lstlisting} The \code{Vector} in this example can be used in the same way as any other array, but relies on the existing data not being deallocated for the lifetime of the \code{Vector}. After it has been constructed, an \code{Array} can be resized, relinked or cleared completely as follows: \begin{lstlisting} M.resize(5,2); // Works up to 7 dimensions M.resize(dimension(5,2)); // As above N.resize(M.dimensions()); // Resize N to be the same size as M v.link(M(end-1,__)); // Size of v set to that of the argument and link to data v >>= M(end-1,__); // Convenient syntax for linking, similar to Fortran's "->" M.clear(); // Returns array to original empty state \end{lstlisting} The member functions \code{resize} and \code{clear} unlink from any existing data, which involves deallocation if no other array is pointing to the same data. If the \code{link} function, or the alternative ``\code{>>=}'' syntax, is applied with a non-empty array on the left-hand-side then the existing data will be quietly cleared before linking to the new data. Note that if you assign one array to another (e.g.\ \code{N=M}), then they must be of the same size; if they are not then you should clear the left-hand-side first. By default, resized arrays are row-major, unless \code{set\_array\_row\_major(false)} has been called. To explicitly specify the ordering, you may use the \code{resize\_row\_major} or \code{resize\_column\_major} member functions in place of \code{resize}. The \code{Array} class implements a number of member functions for inquiring about its properties: \begin{description} \citem{size()} Returns the total number of elements, i.e.\ the product of the lengths of each of the dimensions. \citem{dimension(i)} Returns the length of dimension \code{i}. \citem{offset(i)} Returns the separation in memory of elements along dimension \code{i}. \citem{gradient\_index()} For active arrays, returns the gradient index of the first element of the array, which is always positive; for inactive arrays it returns a negative number. \citem{empty()} Returns \code{true} if the array is in the empty state, or \code{false} otherwise. \citem{dimensions()} Returns a object listing the extent of each dimension in the \code{Array}, useful for resizing other arrays. The object is actually of type \code{ExpressionSize} (where \code{Rank} is the rank of the array), a thin wrapper for a simple \code{int[Rank]} C-array, although it is rare to need to use it explicitly. \citem{offset()} Returns an object (also of type \code{ExpressionSize}) describing how array indices are translated into memory offsets. \end{description} An \code{Array} may be filled using the \code{<<} operator for the first element followed by either the \code{<<} or \code{,} operators for subsequent elements: \begin{lstlisting} Vector v(4); v << 1 << 2 << 3 << 4; // Fill the four elements of v v << 1, 2, 3, 4; // Same behaviour but easier on the eye v << 1, 2, 3, 4, 5; // Error: v has been overfilled Matrix M(2,4); M << 1, 2, 3, 4, // Filling of multi-dimensional arrays 5, 6, 7, 8; // automatically moves on to next dimension M << 1, 2, 3, 4, v; // v treated as a row vector here \end{lstlisting} For multidimensional arrays, elements are filled such that the final dimension ticks over fastest (regardless of whether the array uses row-major storage internally), and new rows are started when a row is complete. Moreover, other arrays can be part of the list of elements, provided that they fit in. In this context, a rank-1 array is treated as a row vector. An \code{index\_out\_of\_bounds} exception is thrown if an array is overfilled, while an \code{empty\_array} exception is thrown if an attempt is made to fill an empty array. \cxx11 \begin{leftbar} If you compile your code with C++11 features enabled then you can use the ``initializer list'' feature to fill arrays using the C-like curly bracket syntax: \begin{lstlisting} Vector v; // Construct an empty vector v = {1, 2, 3}; // Resize to length 3 and fill Vector w = {1, 2, 3}; // Construct a vector of length 3 and fill w = {4.4, 5.5}; // Underfill leads to remaining elements set to zero (as in C) w = {6, 7, 8, 9}; // Overfill leads to size_mismatch exception being thrown Matrix M = {{1, 2, 3}, // Multi-dimensional arrays use nested curly brackets; {4, 5}}; // ...underfill again leads to remaining elements set to zero \end{lstlisting} Another convenient property of this syntax is that temporary arrays with explicit values can be used in expressions: \begin{lstlisting} v = w * Vector{3.0, 4.2, 5.1}; \end{lstlisting} \end{leftbar} When interfacing with other libraries, direct access to the data is often required. The \code{Array} class provides the following member functions: \begin{description} \citem{data()} Returns a pointer to the first element in the array, i.e.\ the element found by indexing all the dimensions of the array with zero. It is up to the caller to understand the layout of the data in memory and not to stray outside. Remember that an array may be strided and the stride may even be negative so that the data returned from increasing indices are actually from earlier memory addresses. Note that a double-precision active array is not stored as an array of \code{adouble} objects, but as an array of \code{double} data and a single gradient index for the first element. Thus the pointer returned by \code{data()} will point to the underlying inactive data. In contexts where the \code{Array} object is \code{const}, a \code{const} pointer will be returned. Note that in a multi-dimensional array, successive array dimensions are not guaranteed to be contiguous in memory since it is sometimes advantageous for vectorization for \Adept\ to pad the rows to an alignment boundary. You can use the output of the \code{offset()} member function to determine the spacing of the elements in each dimension. % \citem{const\_data()} It is sometimes convenient to specify explicitly that read-only access is required, in which case you can use \code{const\_data()} to return a \code{const} pointer to the first element in the array. \end{description} \section{Operators and mathematical functions} \label{sec:operators} The operators and mathematical functions listed in section \ref{sec:ad_functionality} have been overloaded so that they work exactly as you would expect. Consider this example: \begin{lstlisting} floatVector a(5); // Inactive single-precision vector aVector b(5), c(5); // Active vectors aReal d; // An active scalar // ... other code manipulating a-d ... b = 2.0; // Set all elements of b to a scalar value c += 5.0*a + sin(b)/d; // Add the right-hand-side to c \end{lstlisting} The penultimate illustrates that all elements of an \code{Array} can be set to the same value, although note that this will only work if the array is not in the empty state. The final line illustrates how terms with different rank, type and activeness can participate in the same expression. Scalars and arrays can participate in the same expression on the right-hand-side of a statement provided that the arrays have the same size as the array on the left-hand-side. Objects of different type (in this case single and double precision) can be combined in a mathematical operation, and the type of that operation will be the larger (higher precision) of the two types. If active and inactive objects participate in an expression then the left-hand-side must also be active. Expression templates ensure that no temporary arrays need to be created to store the output of intermediate parts of the expression. The functions \code{max} and \code{min} behave just like binary operators (such as \code{+} and \code{*}) in this regard, as shown by the following: \begin{lstlisting} c = max(a,b); // Element-wise comparison of a and b c = min(a,3.0); // Return minimum of each element of a and 3 \end{lstlisting} The examples so far have floating-point results, but some operators (e.g.\ ``\code{==}'') and some functions (e.g.\ \code{isinf}) take floating-point arguments and return a boolean. The \Adept\ versions take floating-point array expressions as arguments and return \code{bool} expressions of the same rank and size. Finally, the \Adept\ versions of the operators \code{!}, \code{||} and \code{\&\&} take a \code{bool} expression as arguments and return a \code{bool} expression of the same size and rank. \section{Array slicing} \label{sec:slice} This section concerns the many ways that sub-parts of an \code{Array} can be extracted to produce an object that can be used as an lvalue; that is, if the object is modified then it will modify part of the original \code{Array}. It should be stressed that none of these methods results in any rearrangement of data in memory, so they should be efficient. The first way this can be done is via the function-call and member-access operators (i.e.\ \code{operator()} and \code{operator[]}, respectively) of the \code{Array}. In the case of the function-call operator, the same number of arguments as the rank of the array must be provided, where each argument states how its corresponding dimension should be treated. The nature of the resulting object depends on the type of all of the arguments in a way that is similar to how Fortran arrays behave, although note that array indices always start at 0. The four different behaviours are as follows: \begin{description} \item[Extract single value.] If every argument is an integer scalar or scalar expression, then a reference to a single element of the array will be extracted. If an argument is an integer expression containing \code{end}, then \code{end} will be interpretted to be the index to the final element of that dimension (a feature borrowed from Matlab). If the array is active then the returned object will be of a special ``active reference'' type that can be used as an lvalue and ensures that any expressions making use of this element can be differentiated. Now for some examples: \begin{lstlisting} aMatrix A(4,3); aReal x = A(1,1); // Copy element at second row and second column into x A(end-1,1) *= 2.0; // Double the element in the penultimate column and 2nd row of A A(3) = 4.0; // Error: number of indices does not match number of dimensions \end{lstlisting} \item[Extract regular subarray.] If every argument is either (i) an integer scalar or scalar expression, or (ii) a regular range of indices, and there is at least one of (ii), then an \code{Array} object will be returned of the same type and activeness as the original. However, for each argument of type (i), the rank of the returned array will be one less than that of the original. There are three ways to express a regular range of indices: ``\code{\_\_}'' represents all indices of a particular dimension, \code{range(a,b)} represents a contiguous range of indices between \code{a} and \code{b} (equivalent to \code{a:b} in Fortran and Matlab), and \code{stride(a,b,c)} represents a regular range of indices between \code{a} and \code{b} with spacing \code{c} (equivalent to \code{a:b:c} in Fortran and \code{a:c:b} in Matlab). Note that \code{a}, \code{b} and \code{c} may be scalar expressions containing \code{end}, but \code{c} must not be zero although it can be negative to indicate a reversed ordering. The rank of the returned array is known at compile time; thus if range arguments are found at run-time to contain only one element (e.g.\ \code{range(1,1)}) then the dimension being referred to will be not be removed in the returned array but will remain as a singleton dimension. This behaviour is the same as indexing an array dimension with \code{1:1} in Fortran. Now for some examples: \begin{lstlisting} v(range(1,end-1)) // Subset of vector v that excludes 1st & last points A(0,stride(end,0,-1)) // First row of A as a vector treated in reverse order A(range(0,0),stride(0,0,1)) // A 1-by-1 matrix containing the first element of A \end{lstlisting} \item[Extract irregular subarray.] If an array is indexed as in either of the two methods above, except that one or more dimensions is instead indexed using a rank-1 \code{Array} of integers, then the result is a special ``indexed-array'' type that stores how each dimension is indexed. If it then participates either on the left- or right-hand-side of a mathematical expression then when an element is requested, the indices will be queried to map the request to obtain the correct element from the original array. This is much less efficient than using regular ranges of indices as above. It also means that if an indexed array is passed to a function expecting an object of type \code{Array}, then it will first be converted to an \code{Array} and any modifications performed within the function will not be passed back to the original array. For example: \begin{lstlisting} intVector index(3); index << 2, 3, 5; Array A(4,4); A(0,index) = 2.0; // Set irregularly spaced elements of the first row of A \end{lstlisting} \item[Slice leading dimension.] In C, an element is extracted from a two-dimensional array using \code{A[i][j]}, and \code{A[i]} returns a pointer to a single row of \code{A}, where \code{i} and \code{j} are integers. To enable similar functionality, if \code{A} is an \Adept\ matrix then \code{A[i]} indexes the leading dimension by integer \code{i} returning an array of rank one less than the original. This is equivalent to \code{A(i,\_\_)}. Furthermore, \code{A[i][j]} will return an individual element as in C, but it should be stressed that \code{A(i,j)} is more efficient since it does not involve the creation of intermediate arrays. \end{description} % There are a few other ways to produce lvalues that consist of a subset or a reordering of an array. They are implemented as member functions of the \code{Array} class, in order to distinguish from non-member functions that produce a copy of the data and therefore cannot be usefully used as lvalues. For example, \code{A.T()} and \code{transpose(A)} both return the transpose of matrix \code{A}, but the former is faster since it does not make a copy of the original data, while the latter is more flexible since it can be applied to array expressions (e.g.\ \code{transpose(A*B)}). The member functions available are: \begin{description} \citem{subset(int ibegin0, int iend0, ...)} This function returns a contiguous subset of an array as an array of the same rank that points to the original data. It takes twice as many arguments as the array has dimensions, with each pair of arguments representing the indices to the first and last element to include from a particular dimension. Exactly the same result can be obtained using \code{range} but the \code{subset} form is more concise. For example, for a matrix \code{M}, \code{M.subset(1,5,3,10)} is equivalent to \code{M(range(1,5),range(3,10))}. % \citem{T()} This function returns the transpose of a rank-2 array (a matrix). The returned array points to the same data but with its dimensions reversed. A compile-time error occurs if this function is used on an array with rank other than 2. Currently \Adept\ doesn't allow the transpose of a rank-1 array (a vector), since vectors are not intended to have an intrinsic orientation. When orientation matters, such as in matrix multiplication, the intended orientation may be inferred from the context or specified explicitly. % \citem{permute(int i0, int i1, ...)} This function is the generalization of the transpose for multi-dimensional arrays: it returns an array of the same rank as the original but with the dimensions rearranged according to the arguments. There must be the same number of arguments as there are dimensions, and each dimension (starting at 0) must be provided once only. The returned array is linked to the original; the permutation is achieved simply by rearranging the list of dimensions and the list of ``offsets'' (the separation in memory of elements along each dimension individually). % \citem{diag\_matrix()} When this function is applied to a rank-1 \code{Array} of length $n$, it returns an $n$-by-$n$ diagonal matrix (specifically a \code{DiagMatrix}; see section \ref{sec:square}) that points to the data from the rank-1 array along its diagonal. % \citem{diag\_vector()} When this function is applied to a rank-2 \code{Array} with equally sized dimensions, it returns a rank-1 array pointing to the data along its diagonals. An \code{invalid\_operation} exception is thrown if applied to a non-square matrix, and a compile-time error if applied to an array of rank other than 2. % \citem{diag\_vector(int i)} When applied to a square rank-2 $n$-by-$n$ \code{Array}, this returns a rank-1 array of length $n-\mathrm{abs}(i)$ pointing to the $i$th superdiagonal of the square matrix, or the $-i$th subdiagonal if $i$ is negative. An \code{invalid\_exception} exception occurs if applied to a non-square matrix, and a compile-time error if applied to an array of rank other than 2. % \citem{submatrix\_on\_diagonal(int ibegin,int iend)} When applied to a square rank-2 array, this function returns a square matrix that shares part of the diagonal of the original matrix. Thus \code{A.submatrix\_on\_diagonal(int ibegin,int iend)} is equivalent to \code{A(range(ibegin,iend),range(ibegin,iend))}. Its purpose is to provide a subsetting facility for symmetric, triangular and band-diagonal matrices (see section \ref{sec:square}) for which general array indexing is not available. If applied to a non-square matrix, an \code{invalid\_operation} exception will be thrown. %\citem{upper\_matrix()} %\citem{lower\_matrix()} %\citem{band\_matrix()} \citem{reshape(int i0, int i1...)} Only applicable to an \code{Array} of rank 1, this returns a multi-dimensional array whose dimensions are given by the arguments to the function. Between 2 and 7 dimensions are possible. If the arguments are such that the total size of the returned array would not match the length of the vector, an \code{invalid\_dimension} exception is thrown. \end{description} \section{Passing arrays to and from functions} \label{sec:passing} When writing functions taking array arguments, there are three different ways to do it depending on the extent to which the function needs to be able to modify the array. In the case of constant array arguments, a constant reference should be used; for example: \begin{lstlisting} Real l3norm(const Vector& v) { // Function returning the L3-norm of a vector return cbrt(sum(v*v*v)); } Vector w(3); w << 1.0, 2.0, 3.0; // Create a test vector Real ans1 = l3norm(w); // Named vector argument Real ans2 = l3norm(w(range(0,1))); // Temporary vector argument Real ans3 = l3norm(2.0*w); // Expression implicitly converted to temporary vector \end{lstlisting} This function works with all three types of argument. The last example illustrates that when an inactive rank-1 expression is passed to the function, it is evaluated and the result placed in a temporary vector that is passed to the function. At the other extreme, we may wish to create a function that modifies an array argument, including the possibility of changing its size; for example: \begin{lstlisting} void resize_and_zero(int n, Vector& v) { // A rather pointless function... v.resize(n); v = 0.0; } Vector w(4); resize_and_zero(2,w); // Results in w={0.0, 0.0} resize_and_zero(2,w(range(0,2))); // Compile error: argument is temporary resize_and_zero(2,2.0*w); // Compile error: argument is not an lvalue \end{lstlisting} In this case, due to the C++ rule that a non-constant reference cannot bind to a temporary object, the function can only take a \emph{non-temporary} \code{Vector} as an argument. This is fair enough; it would not make sense to resize the subset of an array, or an expression. However, it is very common to want to pass a subset of an array to a function and for the function to modify the values of the array, but not to resize it. In \Adept\ this is achieved as follows: \begin{lstlisting} void square_in_place(Vector v) { v *= v; } Vector w(3); w << 2.0, 3.0, 5.0; square_in_place(w); // Results in w={4.0, 9.0, 25.0} square_in_place(w(range(0,1))); // Results in w={4.0, 9.0, 5.0} square_in_place(2.0*w); // No effect on w \end{lstlisting} Even though the \code{Vector} has been passed by value, the \code{Vector} copy constructor performs a ``shallow copy'', which means that little more than the array dimensions and a pointer to the data are copied. Therefore, in the first two examples above the vector \code{v} inside the function points to data in \code{w}, and can therefore modify \code{w}. By contrast, when an expression is passed to the function, a new \code{Vector} is created to hold the result of the expression, and when this is modified inside the function it does not affect the data in the calling routine. The fact that \code{Array} copy constructors perform shallow copies also improves the efficiency of functions that return arrays such as the following: \begin{lstlisting} Matrix square(const Matrix& in) { Matrix out = in*in; // Create an matrix containing the result of in*in return out; } Matrix A(100,100); // Allocate memory for "A" Matrix B = square(A); // Copy constructor: shallow copy of "out" into "B" \end{lstlisting} At the \code{return} statement, matrix \code{out} is received by the copy constructor of matrix \code{B}, so a shallow copy is performed. This means that the description of matrix \code{out} is copied to \code{B}, including a pointer to \code{Storage} object containing both the data and a count of the number of references to it; this counter is increased by one. Matrix \code{out} is then destructed, and the counter is immediately reduced by one. The net result is that \code{B} has ``stolen'' the data in the matrix from \code{out} without it having been copied, thus avoiding unnecessary allocation of memory on the heap followed by copying and deallocation. The shallow-copy implementation leads to behaviour that users may not be expecting. If an array is initialized from another array in either of the following two ways: \begin{lstlisting} Matrix M(3,4); Matrix A(M); // Call copy constructor Matrix B = M; // Call copy constructor \end{lstlisting} then the result is that \code{A}, \code{B} and \code{M} share the same data, rather than a copy being made. To make a deep copy, it is necessary to do the following: \begin{lstlisting} Matrix M(3,4); Matrix A; // Create empty matrix A = M; // Call assignment operator for deep copy \end{lstlisting} This is annoying, but the alternative is that there would be no clean way to pass a subset of an array to a function that then modifies its values. The same behaviour is implemented in the Blitz++ array class \cite[]{Veldhuizen1995}. It should be noted that with the introduction of ``move semantics'' in the C++11 standard, the it is possible to detect when an array returned from a function is about to be destructed, and therefore invoke a move constructor that implements a shallow copy. This negates one of the two reasons from making the copy constructor execute only a shallow copy. But it does not help in passing array subsets to functions, unless two versions of every function were created, one accepting an lvalue reference (\code{Array\&}) and the other accepting an rvalue reference (\code{Array\&\&}), which is hardly practical. \cxx11 \begin{leftbar}If you compile your code with C++11 features enabled then move semantics can sometimes make assignment more efficient. Consider code calling the \code{square} function above: \begin{lstlisting} Matrix A(10,10), B(10,10); B = square(A); // Move assignment operator performs shallow copy Matrix C(B); // B and C now share the same data B = square(A); // Move assignment operator performs deep copy \end{lstlisting} Both assignments are to temporary objects about to be destructed, so the move assignment operator is called. This operator checks how many references there are to the data in \code{B}. In the first case there is only one reference, so the data in \code{B} can safely be discarded and a shallow copy (a ``move'') of the data in the temporary is performed. In the second case there are two references, so a deep copy must be performed in order that \code{C} sees the change in \code{B}. \end{leftbar} \section{Array reduction operations} \label{sec:reduce} A family of functions return a result that is reduced in rank compared to their argument, and operate in the same way as Fortran functions of the same name. Consider the \code{sum} function, which can be used either to sum all the elements in an array expression and return a scalar, or to sum elements along the dimension specified in the second argument and return an array whose rank is one less than the first argument: \begin{lstlisting} Array A(3,4); Real x = sum(A); // Sum all elements of matrix A Vector v = sum(A,1); // Sum along the row dimension returning a vector of length 3 \end{lstlisting} Functions that are used in the same way are \code{mean}, \code{product}, \code{minval}, \code{maxval} and \code{norm2} (the square-root of the sum of the squares of each element). Note the difference between \code{maxval} and \code{max}: the behaviour of \code{max} is outlined in section \ref{sec:operators}. Three further functions operate in the same way but on boolean arrays: \code{all} returns \code{true} only if all elements are \code{true}, \code{any} returns \code{true} if any element is \code{true} (and \code{false} otherwise), while \code{count} returns the number of \code{true} elements. Each of these can work on an individual dimension as with \code{sum} and friends. A further function, \code{dot\_product(a,b)}, takes two arguments that must be rank-1 arrays of the same length and returns the dot product. This is essentially the same as \code{sum(a*b)}. \section{Array expansion operations} \label{sec:expand} The function \code{outer\_product(x,y)} returns the outer product of two rank-1 expressions; if ${\bf x}$ and ${\bf y}$ are interpreted as column vectors then ${\bf xy}^T$ is returned. If \code{outer\_product} is used in an expression then an intermediate matrix object is not created to store it. The function \code{spread(A,n)} returns an array that replicates the \code{A} array \code{n} times along dimension \code{dim}. The returned array has a rank one larger than \code{a} whose dimension \code{dim} is \code{n} and the remaining dimensions are the same as those of \code{A}. It is essentially the same as the Fortran function of the same name, but \code{dim} is provided as a template arguent since performance is improved if this is known at compile time. The following illustrates \code{spread} for an argument of rank 1: \begin{lstlisting} Vector v(3); v << 1, 2, 3; Matrix M0 = spread<0>(v,2); // M1 contains {{1, 2, 3}, // {1, 2, 3}} Matrix M1 = spread<1>(v,2); // M2 contains {{1, 1}, // {2, 2}, // {3, 3}} \end{lstlisting} Note that \code{spread<1>(x,y.size())*spread<0>(y,x.size())} gives the same result as \code{outer\_product(x,y)}. \section{Conditional operations} \label{sec:conditional} There are two main ways to perform an operation on an array depending on the result of a boolean expression. The first is similar to the Fortran \code{where} construct: \begin{lstlisting} Array A(3,4); Array B(3,4); A.where(B > 0.0) = 2.0 * B; // Only assign to A if B > 0 A.where(!isnan(B)) = either_or(-B, 0.0); // Read from either one expression or the other \end{lstlisting} In the first example, \code{A} is only assigned if a condition is met, and therefore \code{A} must be of the same size and rank of the boolean expression. In the second example \code{A} is filled with elements from the first argument of \code{either\_or} if the boolean expression is \code{true}, or from the second argument otherwise; if \code{A} is empty then it will be resized to the size of the boolean expression. In both cases, the expressions on the right-hand-side may be scalars or array expressions of the same size as the boolean expression. Equivalent expressions are possible replacing the assignment operator with the \code{+=}, \code{-=}, \code{*=} and \code{/=} operators, in which case \code{A} must already be the same size as the boolean expression. An alternative approach that works with only vectors uses the \code{find} function. This is similar to the equivalent Matlab function and returns an \code{IndexVector} (a vector of integers of sufficient precision to index an array) containing indices to the \code{true} elements of the vector: \begin{lstlisting} Vector v(10), w(10); v(find(v > 5.0)) = 3.0; IndexVector index = find(v > 5.0); v(index) = 2.0 * u(index); \end{lstlisting} This will work if no \code{true} elements are found: \code{find} will return an empty array, and when \code{v} is indexed by an empty vector, no action will be taken. In general, \code{find} is less efficient than \code{where}. \section{Fixed-size arrays} \label{sec:fixed} The size of the \code{Array} class is dynamic, which is somewhat sub-optimal for small arrays whose dimensions are known at compile time. \Adept\ provides an alternative class template for an array whose size is known at compile time and whose data are stored on the stack. It has the following declaration: \begin{lstlisting} namespace adept { template class FixedArray; } \end{lstlisting} The type (e.g.\ \code{double}) and activeness are specified by the first two template arguments, while the remaining template arguments provide the size of the dimensions, up to 7. Only as many sizes need to be specified as there are dimensions. A user working with arrays of a particular size could use \code{typedef} to provide convenient names; for example: \begin{lstlisting} typedef FixedArray Vector4; typedef FixedArray Matrix44; typedef FixedArray aVector4; typedef FixedArray aMatrix44; \end{lstlisting} In the \code{adept} namespace, \Adept\ defines \code{Vector2}, \code{Vector3}, \code{Matrix22}, \code{Matrix33} and their active counterparts. Fixed arrays have all the same capabilities as dynamic arrays, with a few exceptions: \begin{itemize} \item Since their size is fixed, there are no member functions \code{resize}, \code{clear} or \code{in\_place\_transpose}. \item Since for the lifetime of the object it is associated with data on the stack, it cannot link to other data. This means that there is no member function \code{link}, and also if it is passed by value to a function then the contents of the array will be copied, rather than the behaviour of the \code{Array} class where the receiving function links to the original data. \end{itemize} All the same slicing operations are available as discussed in section \ref{sec:slice}, and they return the same types when applied to fixed arrays as they do when applied to dynamic arrays. Thus most operations return an \code{Array} object that links to a subset of the data within the \code{FixedArray} object. \section{Special square matrices} \label{sec:square} \Adept\ offers several special types of square matrix that can participate in array expressions. They are more efficient than \code{Array}s in certain operations such as matrix multiplication and assignment, but less efficient in operations such as accessing individual elements. All use an internal storage scheme compatible with BLAS (Basic Linear Algebra Subprograms). All are specializations of the \code{SpecialMatrix} class template, which has the following declaration: \begin{lstlisting} namespace adept { template class SpecialMatrix; } \end{lstlisting} The first template argument is the numerical type, the second provides the functionality specific to the type of matrix being simulated, and the third states whether the matrix participates in the differentiation of an algorithm. The specific types of special matrix are as follows: \begin{description} \item[Square matrices.] \code{SquareMatrix} provides a dense square matrix of type \code{Real} with \code{aSquareMatrix} its active counterpart. Its functionality is similar to a rank-2 \code{Array}, except that its dimensions are always equal and the data along its fastest varying dimension are always contiguous in memory, which may make it faster than \code{Array} in some instances. \item[Symmetric matrices.] \code{SymmMatrix} provides a symmetric matrix of type \code{Real}, and \code{aSymmMatrix} is its active equivalent. Internally this type uses row-major unpacked storage with the data held in the lower triangle of the array and zeros in the upper triangle (equivalent to column-major storage with data in the upper triangle). If the oposite configuration is required then it is available by specifying different template arguments to the \code{SpecialMatrix} class template. Note that with normal access methods, the storage scheme is opaque to the user; for example, \code{S(1,2)=2.0} and \code{S(2,1)=2.0} have the same effect. \item[Triangular matrices.] \code{LowerMatrix} and \code{UpperMatrix} (and their active equivalents prefixed by ``\code{a}'') provide triangular matrices of type \code{Real}. Internally they use row-major unpacked storage, although column-major storage is available by specifying different template arguments to the \code{SpecialMatrix} class template. \item[Band diagonal matrices.] \code{DiagMatrix}, \code{TridiagMatrix} and \code{PentadiagMatrix} provide diagonal, tridiagonal and pentadiagonal \code{Real} matrices, respectively (with their active equivalents prefixed by ``\code{a}''). Internally, row-major BLAS-type band storage is used such that an $n$-by-$n$ tridiagonal matrix stores $3n$ rather than $n^2$ elements. \Adept\ supports arbitrary numbers of sub-diagonals and super-diagonals, accessible by specifying different template arguments to the \code{SpecialMatrix} class template. \end{description} A \code{SpecialMatrix} can be constructed and resized as for \code{Array}s (see section \ref{sec:array}), with the following additions: \begin{lstlisting} SymmMatrix S(4); // Initialize a 4-by-4 symmetric matrix S.resize(5); // Resize to a 5-by-5 matrix \end{lstlisting} These are applicable to all types of \code{SpecialMatrix}. In terms of array indexing and slicing, the member functions \code{T}, \code{diag} and \code{diag\_submatrix} described in section \ref{sec:slice} are all available, but if you index a \code{SpecialMatrix} with \code{S(a,b)} then \code{a} and \code{b} must be scalars or scalar expressions. For triangular or band-diagonal matrices, if the requested element is one of the zero parts of the matrix then it can only be used as an rvalue in an expression. If you wish to extract arbitrary subarrays from a \code{SpecialMatrix} then it must first be converted to a \code{Matrix}: \begin{lstlisting} SymmMatrix S(6); intVector index(3); index << 2, 3, 5; Matrix M = Matrix(S)(index,stride(0,4,2)); \end{lstlisting} \section{Matrix multiplication} \label{sec:matmul} Matrix multiplication may be invoked in two equivalent ways: using the \code{matmul} function or the ``\code{**}'' pseudo-operator. Following Fortran, the two arguments may be either rank-1 or rank-2, but at least one argument must be of rank-2. The orientation of any rank-1 argument is inferred from whether it is the first or second argument, as shown here: \begin{lstlisting} Matrix A(3,5), B(5,3), C; Vector v(5), w; C = matmul(A,B); // Matrix-matrix multiplication: return a 3x3 matrix w = matmul(v,B); // Interpret v as a row vector: return a vector of length 3 w = matmul(A,v); // Interpret v as a column vector: return a vector of length 3 \end{lstlisting} In this way it is never necessary to transpose a vector; the appropriate orientation to use is inferred from the context. You may find it clearer to use ``\code{**}'' for matrix multiplication as illustrated here:\footnote{A drawback of the \code{**} interface with the orientation of vector arguments being inferred is that in an expression like \code{A**v**B} (where \code{A} and \code{B} are matrices and \code{v} is a vector), \code{v} is interpreted as a column vector in \code{A**v}, which returns a column vector result, but this result is then implicitly transposed when it is used as the left-hand argument of the matrix multiplication with \code{B}. Moreover, the order of precedence affects the result, since this expression will not give the same answer as \code{A**(v**B)}. % I may % consider introducing additional constraints and features in future % versions to require users to more explicitly state what they mean in % such situations, to reduce the chance of accidental mistakes. } \begin{lstlisting} Matrix A(3,5), B; SymmMatrix S(5); // 5-by-5 symmetric matrix Vector c, x(5); c = A ** log(S) ** x; // Returns a vector of length 3 c = matmul(matmul(A,log(S)),x); // Equivalent to the previous line but using matmul c = A ** (log(S) ** x); // As the previous example but more efficient B = 2.0 * S ** A.T(); // Returns a 5-by-3 matrix B = 2.0 * S ** A; // Run-time error: inner dimensions don't match \end{lstlisting} The ``\code{**}'' pseudo-operator has been implemented in \Adept\ by overloading the dereference operator such that ``\code{*A}'' returns a special type when applied to array expressions, and overloading the multiply operator to perform matrix multiplication when one of these types is on the right-hand-side. This means that \code{**} has the same precedence as ordinary multiplication, and both will be applied in order of left to right. Thus, in the first example above, matrix-matrix multiplication is performed followed by matrix-vector multiplication. The second example shows how to make this more efficient with parentheses to specify that the rightmost matrix multiplication should be applied first, leading to two matrix-vector multiplications. The final example shows an expression that would fail at runtime with an \code{inner\_dimension\_mismatch} exception due to the matrix multiplication being applied to matrices whose inner dimensions do not match. You cannot use \code{matmul} or ``\code{**}'' for vector-vector multiplication, since it is ambiguous whether you require the inner product (dot product) or the outer product. Therefore you must explicitly call the function \code{dot\_product} (section \ref{sec:reduce}) or \code{outer\_product} (section \ref{sec:expand}). In order to get the best performance, \Adept\ does not use expression templates for matrix multiplication but rather calls the appropriate level-2 BLAS function for matrix-vector multiplication and level-3 BLAS function for matrix-matrix multiplication. For matrix multiplication involving active vectors and matrices, \Adept\ first uses BLAS to perform the matrix multiplication and then stores the equivalent differential statements. There are therefore a few factors that users should be aware of in order to get the best performance: \begin{itemize} \item If an array expression rather than an array is provided as an argument to matrix multiplication, it will first be converted to an \code{Array} of the same rank. Therefore, if the same expression is used more than once in a sequence of matrix multiplications, better performance will be obtained by precomputing the array expression and storing it in a temporary matrix: \begin{lstlisting} Matrix A(5,5), B(5,5), C(5,5), D(5,5) // Slow implementation: C = transpose(2.0*A*B) ** (2.0*A*B); D = (2.0*A*B) ** C; // Faster implementation: { Matrix tmp = 2.0*A*B; C = tmp.T() ** tmp; D = tmp ** C; } // "tmp" goes out of scope here \end{lstlisting} \item If the left-hand argument of a matrix multiplication is a symmetric, triangular or band matrix then a specialist BLAS function will be used that is faster than the one for general dense matrices. \Adept\ may not be able to tell if the result of an array expression is symmetric, triangular or has a band structure, and so may not call the most efficient BLAS function. The user can help as follows: \begin{lstlisting} SymmetricMatrix S(5,5) Matrix A(5,5), B(5,5) B = (2.0*exp(S)) ** A; // Slower B = SymmMatrix(2.0*exp(S)) ** A; // Faster \end{lstlisting} \item BLAS requires that the fastest-varying dimension of input matrices are contiguous and increasing. This is always the case for the special square matrices described in section \ref{sec:square}, but not necessarily for a \code{Matrix} or an \code{aMatrix}, which are particular cases of the general \code{Array} type. If the fastest-varying dimension of such a matrix is not contiguous and increasing then \Adept\ will copy it to a temporary matrix before invoking matrix multiplications, as in the following example: \begin{lstlisting} Matrix A(5,5), B, C(5,5); B.link(A(__, stride(end,1,-1)); // Fastest varying dim is contiguous but decreasing C = A ** A; // Matrix multiplication applied directly with A C = B ** B; // Adept will copy B to a temporary matrix before multiplication \end{lstlisting} \end{itemize} An additional member function to mention in this section is \code{in\_place\_transpose()}, which is only applicable to matrices. It transposes the matrix by swapping the dimensions and the offsets to each dimension, but leaving the actual data untouched. This means that a matrix with row-major storage will be changed to column-major, and vice versa. \Adept\ can differentiate expressions involving matrix multiplcation, but this is far from optimal in \Adept\ version 2.0, for two reasons. Firstly, only differentiation of dense matrices has been implemented, so when matrix multiplication is applied to active ``special matrices'' (symmetric, band, upper-triangular and lower-triangular matrices), they are first copied to a dense matrix. Secondly, the \Adept\ stack format can currently only store differential statements for scalar expressions, which for matrix multiplication leads to lots of repeated values on the stack. A future version of \Adept\ will redesign the stack to allow matrices to be stored in it; this will be much faster and much less memory-hungry. \section{Linear algebra} \label{sec:la} \Adept\ provides the functions \code{solve} and \code{inv} to solve systems of linear equations and to invert a matrix, respectively, which themselves call the most appropriate function from LAPACK. \begin{lstlisting} Matrix A(5,5), Ainv(5,5), X(5,5), B(5,5); SymmMatrix S(5), Sinv(5); Vector x(5), b(5); Ainv = inv(A); // Invert general square matrices using LU decomposition Sinv = inv(S); // Invert symmetric matrices using Cholesky decomposition x = solve(A,b); // Solve general system of linear equations X = solve(S,B); // Solve symmetric system of linear equations with matrix right-hand-side \end{lstlisting} \iffalse As for matrix multiplication described in section \ref{sec:matmul}, if the arguments to \code{solve} and \code{inv} are not matrices with fastest-varying dimensions that are contiguous and increasing, then \Adept\ will first convert them to temporary matrices before performing the operation. \fi Statements involving \code{solve} and \code{inv} cannot yet be automatically differentiated. When the \Adept\ stack is redesigned to hold matrices, this capability will be added. \section{Interpolation} \emph{Adept} supports linear and nearest-neighbour interpolation, in one, two and three dimensions via the \code{interp}, \code{interp2d} and \code{interp3d} functions. The example below shows how these functions are called and the size of the arguments, but does not fill the arguments with actual data (see the test program \code{test/test\_interp.cpp} for complete usage): % \begin{lstlisting} // Size of each dimension int nx, ny, nz; // Coordinate vectors of each dimension (must be monotonic) Vector x(nx), y(ny), z(nz); // Arrays to be interpolated Vector A1(nx); Matrix A2(ny,nx); Array3D A3(nz,ny,nx); // Number of points required int ni; // Locations of these points Vector xi(ni), yi(ni), zi(ni); // Output vector Vector v(ni); // Linear interpolation (default) v = interp(x,A1,xi); v = interp(x,A1,xi,ADEPT_INTERPOLATE_LINEAR); // Specifying scheme explicitly v = interp2d(y,x,A2,yi,xi); v = interp3d(z,y,x,A3,zi,yi,xi); // Nearest-neighbour interpolation v = interp(x,A1,xi,ADEPT_INTERPOLATE_NEAREST); v = interp2d(y,x,A2,yi,xi,ADEPT_INTERPOLATE_NEAREST); v = interp3d(z,y,x,A3,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST); \end{lstlisting} % Each interpolation function takes coordinate vectors describing each dimension of the interpolation array in the order of the dimensions of that array. In the two dimensional case, since matrices are indexed first by row ($y$ axis) then column ($x$ axis), this is the order they are shown here. The interpolation arrays (\code{A1}, \code{A2} and \code{A3} here) may have more dimensions than shown above; for each additional dimension, a further dimension is added to the output array, and effectively multiple arrays are interpolated at once. In this case, the coordinate vectors still refer to the first one, two or three dimensions of this array and the remaining (more rapidly varying in memory) dimensions come after. As can be seen from the listing above, an optional argument after the array arguments specifies the interpolation scheme to use, but this argument can also be used to specify the extrapolation policy to apply for requested points that lie outside of the interpolation array by using a bitwise-OR with one of the following: % \begin{description} \citem{ADEPT\_EXTRAPOLATE\_DEFAULT} Use the default extrapolation policy associated with the interpolation scheme (see below). Obviously this can be omitted. \citem{ADEPT\_EXTRAPOLATE\_LINEAR} Linear extrapolation; this is the default for linear interpolation, but is not available with nearest-neighbour interpolation. \citem{ADEPT\_EXTRAPOLATE\_CLAMP} Clamp the returned value at the nearest valid point in the interpolation array; this is the default for nearest-neighbour interpolation. \citem{ADEPT\_EXTRAPOLATE\_CONSTANT} Set outliers to a constant value provided by a further optional argument to the function, or \code{NaN} if no additional argument is provided. \end{description} For example: \begin{lstlisting} // Explicit selection of default behaviour (linear interpolation & extrapolation) v = interp(x,A1,xi,ADEPT_INTERPOLATE_LINEAR|ADEPT_EXTRAPOLATE_DEFAULT); // Nearest-neighbour interpolation with clamped extrapolation v = interp(x,A1,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP); // Nearest-neighbour interpolation, outliers set to NaN v = interp(x,A1,xi,ADEPT_EXTRAPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT); // Linear interpolation, outliers set to zero v = interp(x,A1,xi,ADEPT_EXTRAPOLATE_CONSTANT, 0.0); \end{lstlisting} \section{Bounds and alias checking} \label{sec:bounds} When encountering an array or active expression, \Adept\ performs several checks to test the validity of the expression both at compile time and at runtime: \begin{description} \item[Activeness check.] An expression in which an active expression is assigned to an inactive array will fail to compile. \item[Rank check.] An expression will fail to compile if the rank of the array on the left-hand-side of the ``\code{=}'' operator (or the operators ``\code{+=}'', ``\code{*=}'', etc.) does not match the rank of the array expression on the right-hand-side. However, a scalar (rank-0) expression can be assigned to an array of any rank; its value will be assigned to all elements of the array. Compile-time rank checks are also performed for each binary operation (binary operators such as ``\code{+}'' and binary functions such as \code{pow}) making up an array expression: compilation will fail if the two arguments do not have the same rank and neither is of rank 0. \item[Dimension check.] When a binary operation is applied to two array expressions of rank $n$ then \Adept\ checks at run-time that each of the $n$ dimensions has the same length. Otherwise, a \code{size\_mismatch} exception is thrown. \item[Alias check.] By default, \Adept\ checks to see whether the memory referenced in the array object on the left-hand-side of a statement overlaps with the memory referenced by any of the objects on the right-hand-side, as in this example of a shift-right operation: \begin{lstlisting} Vector v(6); v(range(1,end)) = v(range(0,end-1)); \end{lstlisting} In order to prevent the right-hand-side changing during the operation, \Adept\ copies the expression on the right-hand-side to a temporary array and then assigns the left-hand-side array to this temporary, which is equivalent to the following: \begin{lstlisting} { Vector tmp; tmp = v(range(0,end-1)); v(range(1,end)) = tmp; } // tmp goes out of scope here \end{lstlisting} However, for speed \Adept\ does not check to see whether individual memory locations are shared; rather the start and end memory locations are checked to see if they overlap. This means that for certain strided operations, copying to a temporary array is unnecessary. Nor is it necessary if elements of an array will be accessed in exactly the same order on the left-hand-side as the right-hand-side. If the user is sure that alias checking is not necessary then he or she can override alias checking for part or all of an array expression using the \code{noalias} function, as follows: \begin{lstlisting} v(stride(1,end,2)) = noalias(v(stride(0,end-1,2))); // No overlap between RHS and LHS v = 1.0 + noalias(exp(v)); // LHS & RHS accessed in same order \end{lstlisting} Note that for speed, alias checking is not performed if the left-hand-side is a \code{FixedArray}, since such arrays can never point to another location and therefore aliasing is less likely to arise. Aliasing is still possible if one of the terms on the right-hand-side points to the data in the \code{FixedArray} on the left. In this case, you can use the \code{eval} function, which takes a non-scalar expression as an argument, and returns an array containing a copy of the data. For example: \begin{lstlisting} FixedArray v = {1.0, 2.0, 3.0}; // C++11 initialization of inactive vector v = v(stride(end,0,-1)); // Aliasing leads to v = {3.0, 2.0, 3.0} v = eval(v(stride(end,0,-1))); // Expected result: v = {3.0, 2.0, 1.0} \end{lstlisting} To avoid the overhead of alias checking, you can define the preprocessor variable \code{ADEPT\_NO\_ALIAS\_CHECKING}, but then it is up to the user to identify the statements where aliasing will occur and use the \code{eval} function to ensure the correct behaviour. \item[Bounds check.] If the preprocessor variable \code{ADEPT\_BOUNDS\_CHECKING} is defined then additional run-time checks will be performed when an array is indexed or sliced using the methods described in section \ref{sec:slice}; if an index is ount of bounds then a \code{index\_out\_of\_bounds} exception will be thrown. This makes indexing and slicing of arrays slower so would normally only be used for debugging. \end{description} \section{Automatic differentiation capabilities specific to arrays} Section \ref{sec:adjoint} described how the \code{get\_gradient()} member function could be used to extract the gradients from a scalar \code{adouble} object after applying forward- or reverse-mode differentiation. In the same way, gradients may be extracted from active \code{Array} and \code{FixedArray} objects, returning an inactive \code{Array} of the same rank and size. For example, to compute the derivative of a \code{norm2} operation, we could do the following: \begin{lstlisting} Stack stack; // Stack to store differential statements aVector x = {1.0, 2.0, 3.0}; // C++11 initialization stack.new_recording(); // Clear any stored differential statements aReal y = norm2(x); // Perform operation to be differentiated y.set_gradient(1.0); // Seed the independent variable stack.reverse(); // Reverse-mode differentiation Vector dy_dx = x.get_gradient(); // Extract vector of derivatives \end{lstlisting} \section{Array thread safety} \label{sec:thread} There are numerous ways of obtaining an \code{Array} that links to data in another \code{Array} object; not only the ``\code{>>=}'' link operator described in section \ref{sec:array}, but also the various subsetting member functions described in section \ref{sec:slice}, and even just passing arrays to and from functions. This avoids deep copying and so improves efficiency. In addition to the new \code{Array} pointing to the same data, it also points to the same \code{Storage} object, and when a new link is created, the counter in this object indicating the number of objects pointing to it is incremented. This ensures that the data will remain provided there is at least one object linking to it. A downside of this model is that if multiple threads access an array simultaneously, even if just to read it, then the reference counter can become corrupted. There are two solutions to this problem. \cxx11 \begin{leftbar} If you are using C++11 then you can define the \code{ADEPT\_STORAGE\_THREAD\_SAFE} preprocessor variable, which makes the reference counter in \code{Storage} objects of type \code{std::atomic} and thereby protects all operations on them by a mutex. This may degrade the efficiency of your code since the mutex will be redundant in single-threaded code. \end{leftbar} Alternatively, we use the capability of arrays to access data not held in a \code{Storage} object. The \code{Array} and \code{SpecialMatrix} classes have a \code{soft\_link()} member function that returns an object of the same type, size and activeness, which points to the same data but does not contain a link to the \code{Storage} object: \begin{lstlisting} Matrix M(2,2); // ...enter multi-threaded environment Matrix N; N >>= M.soft_link(); // N links to same data as M but without Storage object Vector v = M.soft_link()(__,0); // v links to subset of M but without Storage object // (recall that the copy constructor is called here) \end{lstlisting} The linked objects may be used in the same way as any other \code{Array}. This is demonstrated in the \code{test\_thread\_safe\_arrays} test program. \section{Writing an array to a stream} As you would expect, an array can be written to a stream with the ``\code{<<}'' operator: \begin{lstlisting} Vector v = {1, 2}; // Using C++11 initializer lists Matrix M = {{3, 4}, {5, 6}}; // for convenience std::cout << v << "\n"; std::cout << M << "\n"; \end{lstlisting} which by default produces \begin{lstlisting} 1 2 3 4 5 6 \end{lstlisting} You can change the output to use curly brackets to indicate the dimensions of the array as follows: \begin{lstlisting} Vector v = {1, 2}; Matrix M = {{3, 4}, {5, 6}}; adept::set_array_print_style(PRINT_STYLE_CURLY); std::cout << "v = " << v << ";\n"; std::cout << "M = " << M << ";\n"; \end{lstlisting} which produces output that looks like C/C++ code: \begin{lstlisting} v = {1, 2}; M = {{3, 4}, {5, 6}}; \end{lstlisting} The available print styles for use by \code{set\_array\_print\_style} are \code{PRINT\_STYLE\_PLAIN} (default), \code{PRINT\_STYLE\_CURLY}, \code{PRINT\_STYLE\_CSV} (comma-separated values) and \code{PRINT\_STYLE\_MATLAB} (matrix ordering indicated by Matlab-style semi-colons and square brackets). \section{Fortran interoperability} \label{sec:fortran} The traditional way to pass arrays between Fortran and C/C++ makes use of the fact that Fortran passes its ``explicit-shape'' arrays (the type used since Fortran-77) to and from routines simply as a pointer to the first element of the array. It is then up to the receiving routine to declare the size of the array correctly. \Adept\ arrays can therefore be passed to Fortran routines using their \code{data()} and \code{const\_data()} member functions, which return pointers to the first element of the array. Since Fortran-90, the language also supports ``assumed-shape'' arrays, which are very much like \Adept's \code{Array} objects: they contain within them the extent of each array dimension, and may refer to data that are strided (non-contiguous) in memory. Fortran passes an assumed-shape array to subroutines and functions in the form of a pointer to its \emph{array descriptor} (sometimes known as a \emph{dope vector}), which contains a pointer to the first element of the array and information on the rank, type, and the extent and stride-in-memory of each dimension. The Fortran 2018 standard defines an interface to allow assumed-shape arrays to be passed to and from C or C++ functions. Fortran compilers supporting this standard provide a C/C++ header file \code{ISO\_Fortran\_binding.h} that defines the array descriptor as a structure \code{CFI\_cdesc\_t}. The \Adept\ header file \code{adept\_fortran.h} provides a class \code{adept::FortranArray}, a thin wrapper to this structure, that enables an \Adept\ \code{Array} object to share its data with a Fortran array. This is very efficient as only the array descriptor information is copied, not the actual data in the array. At the time of writing, support for this capability in Fortran compilers is limited. An crucial point to be aware of in all the examples that follow is that \Adept\ indexes its arrays in row-major order starting at 0, while Fortran indexes its arrays in column-major order starting (by default) at 1. When arrays are passed between the two languages, the native array convention is adopted. Therefore, matrix element \code{A(0,10)} in \Adept\ would be indexed as \code{A(11,1)} in Fortran. \subsection{Passing arrays from C++/Adept to Fortran} Suppose we have a Fortran subroutine that takes an integer array and a single-precision array as arguments: \begin{lstlisting}[language=Fortran] ! Define a routine callable with same name in C/C++ subroutine fortran_routine(int_array, flt_array) bind(c) implicit none integer(kind=4), intent(inout) :: int_array(:,:) ! Matrix of 4-byte integers real(kind=4), intent(inout) :: flt_array(:,:) ! Matrix of 4-byte real numbers ! --- Body of routine here --- end subroutine fortran_routine \end{lstlisting} The following C++ program demonstrates how \Adept\ arrays can be passed this routine: \begin{lstlisting} #include // Declare interface to the routine, turning off C++ name mangling so that it can be linked // to Fortran extern "C" void fortran_routine(adept::FortranArray* int_array, adept::FortranArray* flt_array); int main() { // Initialize Adept matrices, using shortcuts to the types Array<2,int> and Array<2,float> adept::intMatrix int_arr = {{2, 3, 5}, {7, 11, 13}}; adept::floatMatrix flt_arr = {{2.0, 3.0, 5.0}, {7.0, 11.0, 13.0}}; // Convert Adept arrays to Fortran arrays pointing to the same data, and call the routine; // the conversion to FortranArray pointers is done automatically fortran_routine(adept::FortranArray(int_arr), adept::FortranArray(flt_arr)); return 0; } \end{lstlisting} This will fail to compile if \code{ISO\_Fortran\_binding.h} file is not found. To link the two object files into an executable you will need to use your C++ compiler, but include the relevant Fortran library on the command line (e.g.\ \code{-lgfortran} if you compiled \code{fortran\_routine} with the GNU Fortran compiler, or \code{-lifcore} if you used the Intel Fortran compiler). \subsection{Passing arrays from Fortran to C++/Adept} We can also pass arrays the other way. Consider the following Fortran program: \begin{lstlisting}[language=Fortran] program test_interoperability implicit none ! Define interface to a function implemented in C++ interface subroutine adept_routine(int_array, flt_array) bind(c) integer(kind=4), intent(inout) :: int_array(:,:) ! Matrix of 4-byte integers real(kind=4), intent(inout) :: flt_array(:,:) ! Matrix of 4-byte real numbers end subroutine adept_routine end interface ! Body of program starts here integer(kind=4), allocatable :: imat(:,:) real(kind=4), allocatable :: fmat(:,:) ! --- Code to allocate and populate imat and fmat here --- ! Now call the C++ function call adept_routine(imat, fmat) end program test_interoperability \end{lstlisting} The routine could be implemented in C++ as follows: \begin{lstlisting} #include extern "C" void adept_routine(adept::FortranArray* int_array, adept::FortranArray* flt_array) { // Declare Adept arrays adept::intMatrix int_arr; adept::floatMatrix flt_arr; // Associate Adept arrays with Fortran data, or throw a fortran_interoperability_error // exception if the rank or type do not match int_arr >>= int_array; flt_arr >>= flt_array; // --- Operations on int_arr and flt_arr now modify the Fortran arrays --- } \end{lstlisting} Since the executable now contains a Fortran source file with a \code{program} statement, rather than a C++ source file defining a \code{main} function, the linking step of the compilation must be carried out using the Fortran compiler, but passing it the C++ standard library, i.e.\ \code{-lstdc++}. In the example above, the \Adept\ arrays \code{int\_arr} and \code{flt\_arr} behave in the same way as ``linked'' arrays described in section \ref{sec:array}: they know that they do not ``own'' the original data, so if the user then calls their \code{clear} or \code{resize} member functions, they will unlink themselves from the Fortran arrays. The \code{FortranArray} class provides no array features itself, so must be linked to an \code{Array} object before any work can be done on it, but it does provide a handful of member functions for querying its properties: \begin{description} \citem{int rank()} Return the number of dimensions of the array. \citem{int dimension(int i)} Return the extent of dimension \code{i} in memory, counting dimensions from 0 but using the Fortran ordering. \citem{int offset(int i)} Return the stride in memory of dimension \code{i}. \citem{bool is\_type()} Return \code{true} if the element type of the array is the same as \code{Type} (which must be a known type at compile time). \citem{Type* data()} Return a pointer to the first element of the data, cast to the specified type. \end{description} \chapter{Using \Adept's optimization functionality} \label{chap:optimize} \section{Background} \label{sec:optimize} Since version 2.0.8, \Adept\ provides functionality for solving non-linear optimization problems, specifically finding the state vector ${\bf x}$ that minimizes the scalar cost function $J({\bf x})$ (also known as a penalty function or objective function). % A \emph{gradient-free} minimization algorithm (e.g.\ Nelder-Mead) requires simply a user-supplied function for computing $J$, calling it multiple times for different ${\bf x}$ to find the minimum $J$. % A \emph{first-order} minimization algorithm requires also a user-supplied function returning the gradient of the cost function $\partial J/{\partial\bf x}$ (a vector). Examples are the Conjugate Gradient method and the Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method. Knowing the gradient enables such algorithms to find the minimum with far fewer function calls, although a function call returning $\partial J/{\partial\bf x}$ is slower than one returning only $J$. \Adept's optimization interface is in terms of passive array types, so the user is not obliged to use \Adept's automatic differentiation capability to compute these gradients, although the examples in this chapter assume that they do. % A \emph{second-order} minimization algorithm makes use of not only $J$ and $\partial J/{\partial\bf x}$, but also a user-supplied function for the Hessian ${\bf A}=\partial^2J({\bf x})/\partial{\bf x}^2=\nabla_{\bf x}^2J$ (a symmetric matrix), or an approximation of it. Examples are the Gauss-Newton and Levenberg-Marquardt methods. Knowing the second derivative means that even fewer iterations should be required to find the minimum of $J$, but ${\bf A}$ is more expensive to compute than $\partial J/{\partial\bf x}$. \Adept\ does not have the ability to automatically compute Hessian matrices for an arbitrary cost function, but frequently the cost function has a specific form that makes it possible to compute the approximate Hessian from the Jacobian matrix. Consider the optimization problem of finding the parameters $\x$ of non-linear model $\y(\x)$ that provides the closest match to a set of ``observations'' $\y^o$ in a least-squares sense. For maximum generality we add constraints that penalize differences between $\x$ and a set of \emph{a~priori} values $\x^a$, as well as a regularization term. In this case the cost function could be written as \def\myspace{~~} \begin{equation} J(\x) \myspace =\myspace \frac12\left[\y(\x)-\y^o\right]^\mathrm{T}{\bf R}^{-1}\left[\y(\x)-\y^o\right] \myspace+\myspace\frac12\left[\x-\x^a\right]^\mathrm{T}{\bf B}^{-1}\left[\x-\x^a\right] \myspace+\myspace\frac12\x^\mathrm{T}{\bf T}\x.\nonumber \label{eq:objective} \end{equation} Here, all vectors are treated as column vectors, ${\bf R}$ is the error covariance matrix of the observations, ${\bf B}$ is the error covariance matrix of the \emph{a~priori} values, and ${\bf T}$ is a Twomey-Tikhonov matrix that penalizes either spatial gradients or curvature in $\x$. The approximate Hessian matrix is then given by \begin{equation} {\bf A} \myspace\simeq\myspace {\bf H}^\mathrm{T}{\bf R}^{-1}{\bf H}\nonumber \myspace+\myspace {\bf B}^{-1} \myspace+\myspace {\bf T}, \label{eq:hessian} \end{equation} which can be coded up using \Adept\ to compute the Jacobian matrix ${\bf H}=\partial\y/\partial\x$. Each term on the right-hand-side of (\ref{eq:hessian}) has its corresponding term in (\ref{eq:objective}), so it is easy to work out what the Hessian would look like if only a subset of the terms in (\ref{eq:objective}) were present. The first term of (\ref{eq:hessian}) is the `Gauss-Newton' approximation of the true Hessian of the first term of (\ref{eq:objective}). It is exact if $\y(\x)$ is linear, i.e.\ if each element of $\y$ could be represented as a linear combination of the elements of $\x$. In many cases this is a good enough approximation of the Hessian for fast convergence to be achieved. \section{\Adept\ interface} \label{sec:minimizer_interface} For the purposes of demonstrating how this would be implemented in \Adept\ we simplify (\ref{eq:objective}) down to the case of minimizing a quadratic function, in which case $J={\bf y}^\mathrm{T}{\bf y}/2$ and ${\bf y}={\bf x}$. The former of these two equations means that the Hessian matrix is simply ${\bf A}={\bf H}^\mathrm{T}{\bf H}$. The latter we implement using active variables: % \begin{lstlisting} adept::aVector calc_y(const adept::aVector& x) { return x; } \end{lstlisting} % The test program \code{test/test\_minimizer.cpp} uses a ${\bf y}({\bf x})$ function for the much more intersting case of the $N$-dimensional Rosenbrock function. To set up the problem ready for minimizing, we create a class that derives from \Adept's \code{Optimizable} class and overrides five of its virtual functions: % \begin{lstlisting} // Include this header file for the functionality described in this chapter #include class SimpleOptimizable : public adept::Optimizable { public: // Return the cost function for a given state vector x virtual adept::Real calc_cost_function(const adept::Vector& x) { adept::Vector y = value(calc_y(x)); // "value" throws away the activeness return 0.5*sum(y*y); } // Calculate the cost function and its gradient from x virtual adept::Real calc_cost_function_gradient(const adept::Vector& x, adept::Vector gradient) { adept::aVector xactive = x; // Copy x to an active variable stack.new_recording(); adept::aVector y = calc_y(xactive); // Calculate y from x adept::aReal cost = 0.5*sum(y*y); // Calculate cost function as an active variable cost.set_gradient(1.0); // Use reverse-mode differentiation to stack.reverse(); // compute the gradient gradient = xactive.get_gradient(); return value(cost); // Return cost function as passive variable } // Calculate the cost function, its gradient and the approximate Hessian matrix virtual adept::Real calc_cost_function_gradient_hessian(const adept::Vector& x, adept::Vector gradient, adept::SymmMatrix& hessian) { adept::aVector xactive = x; // Copy x to an active variable stack.new_recording(); adept::aVector y = calc_y(xactive); // Calculate y from x adept::aReal cost = 0.5*sum(y*y); // Calculate cost function as an active variable stack.independent(xactive); // Define independent variables stack.dependent(y); // Define dependent variables adept::Matrix jac = stack.jacobian(); // Compute Jacobian matrix dy/dx hessian = jac.T() ** jac; // Hessian is a simple matrix product of Jacobian gradient = jac.T() ** value(y); // Gradient is a matrix-vector product return value(cost); // Return cost function as passive variable } // Every iteration this function is called: here simply report progress to standard output virtual void report_progress(int niter, const adept::Vector& x, adept::Real cost, adept::Real gnorm) { std::cout << "Iteration " << niter << ": cost function = " << cost << "\n"; } // Minimization algorithm may want to check what derivatives are available: here we // provide 0th (cost function alone) 1st (gradient) and 2nd (Hessian), so return true // for 0, 1 or 2, false otherwise virtual bool provides_derivative(int order) { return (order >= 0 && order <= 2); } // Keep an instance of the Adept stack within the class: avoids the initialization costs // incurred each iteration if it was inside calc_cost_function_gradient and // calc_cost_function_gradient_hessian private: adept::Stack stack; }; \end{lstlisting} Note that if you plan to use a first-order minimization algorithm, you do not need to provide a \code{calc\_cost\_function\_gradient\_hessian} function. \Adept's \code{Minimizer} class can minimize the cost function held in an \code{Optimizable} object by calling the user-supplied virtual functions, as follows: \begin{lstlisting} SimpleOptimizable quadratic_function; adept::Minimizer minimizer(MINIMIZER_ALGORITHM_LEVENBERG); // Select minimization algorithm int nx = 10; // Number of state variables adept::Vector x(nx); // Declare state vector x = 3.0; // Initialize state vector to first guess values, all 3.0 // Minimize the cost function: adept::MinimizerStatus status = minimizer.minimize(quadratic_function, x); // Report the convergence status: std::cout << "Convergence status: " << adept::minimizer_status_string(status) << "\n"; \end{lstlisting} % After the \code{minimize} member function is called, \code{x} contains the state vector that minimizes the cost function. The available minimization algorithms are: \begin{description} \citem{MINIMIZER\_ALGORITHM\_CONJUGATE\_GRADIENT} The first-order Conjugate-Gradient algorithm performs a line search along the steepest-descent direction, then uses the Polak-Ribi\`ere formula to compute subsequent search directions that are conjugate to the previous $N$ directions, where $N$ is the number of state variables. The Conjugate-Gradient method is the most memory efficient, so suitable for problems with large $N$. The line search first brackets the minimum then fits a cubic polynomial to the values and gradients at the bounding points to find the best estimate of the next search point. The Wolfe conditions are applied to determine whether the cost function along the search direction has been sufficiently minimized. % \citem{MINIMIZER\_ALGORITHM\_CONJUGATE\_GRADIENT\_FR} As above but using the Fletcher-Reeves formula to compute new search directions. % \citem{MINIMIZER\_ALGORITHM\_LIMITED\_MEMORY\_BFGS} The first-order Limited-Memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) algorithm uses a limited number of previous search directions (default 6) to build up an approximation to the inverse of the Hessian matrix, enabling it to make a better estimate of the location of the minimum of the cost function, but with a slightly higher memory footprint. Note that the full inverse Hessian is not computed explicitly so this method is still efficient in memory for large $N$. % \citem{MINIMIZER\_ALGORITHM\_LEVENBERG} The second-order Levenberg algorithm tries to perform a Gauss-Newton step using the approximate Hessian matrix and assuming that the curvature of $J$ is locally constant. If $J$ at the new ${\bf x}$ is not reduced by this step then a damping parameter $\lambda$ is used to scale between Gauss-Newton and a steepest-descent algorithm. % \citem{MINIMIZER\_ALGORITHM\_LEVENBERG\_MARQUARDT} The second-order Levenberg-Marquardt algorithm is similar to the Levenberg algorithm, but scales such that the step sizes are changed in each dimension according to the curvature of the cost function in that dimension (i.e.\ the diagonal of the Hessian matrix). This tends to result in faster convergence than the Levenberg algorithm for problems with very different scaling for each element of the state vector. \end{description} The minimizer can be configured in detail by calling its member functions listed in section \ref{sec:minimizer_options}. The possible values for the return status are given in section \ref{sec:minimizer_status}. The case above is an example of \emph{unconstrained minimization}: the minimizer is free to try any values of ${\bf x}$. This can lead to it trying unphysical values, such as negative values for a quantity that cannot be negative. To prevent this, all the minimization algorithms allow the user to specify simple box constraints on the elements of the state vector. Suppose we wanted to constrain element 0 to be positive and element 1 to lie in the range 10--20, we would add these lines: \begin{lstlisting} // Declare vectors containing the lower and upper bounds on x adept::Vector x_lower, x_upper; // Set them to the minimum and maximum possible values for their element type (e.g. double) adept::minimizer_initialize_bounds(nx, x_lower, x_upper); // Set a lower bound on element 0 and both bounds on element 1 x_lower(0) = 0.0; x_lower(1) = 10.0; x_upper(1) = 20.0; // Call the minimize function with two extra arguments specifying the bounds status = minimizer.minimize(quadratic_function, x, x_lower, x_upper); \end{lstlisting} % minimizer.set_max_iterations(100); % minimizer.set_converged_gradient_norm(0.1); \section{Other member functions of the \code{Minimizer} class} \label{sec:minimizer_options} In addition to the \code{minimize} member function described in section 3\ref{sec:minimizer_interface}, the following \code{adept::Minimizer} member functions may be called to configure the behaviour of the minimization algorithm: \begin{description} \citem{void set\_algorithm(MinimizerAlgorithm algo)} Set the algorithm to one of the available minimization algorithm, e.g.\ \code{MINIMIZER\_ALGORITHM\_LEVENBERG}. This is an alternative to providing it as an argument to the \code{Minimizer} constructor. % \citem{void set\_algorithm(const std::string\&\ algo)} Set the algorithm using a string, which may be one of ``\code{L-BFGS}'', ``\code{Conjugate-Gradient}'', ``\code{Conjugate-Gradient-FR}'', ``\code{Levenberg}'' or ``\code{Levenberg-Marquardt}''. Note that this function is case-insensitive, and will also accept spaces or underscores in place of hyphens. % \citem{void set\_max\_iterations(int max\_it)} Set the maximum number of iterations (default 100). % \citem{void set\_converged\_gradient\_norm(Real cgn)} The L2-norm of the $\partial J/\partial{\bf x}$ vector is computed each iteration, and convergence is deemed to have been achieved when it falls below the value specified here (default 0.1). % \citem{void set\_max\_step\_size(Real mss)} Set the maximum step size each iteration (default: no maximum). A negative or zero value indicates that no maximum step size is to be used. % \citem{void ensure\_updated\_state(int order = 2)} Often the user will require the Hessian matrix to compute errors in the solution, and will store the Hessian matrix each time the \code{calc\_cost\_function\_gradient\_hessian} function is called. However, by default there is no guarantee that when the minimization has completed this function will have been called with the final version of the state vector. Calling the member function here requests that after minimization is complete, the derivatives of at least the specified \code{order} are consistent with the final state vector (e.g.\ 2 for both the Hessian and the gradient vector). % \citem{set\_max\_line\_search\_iterations(int mi)} Set the maximum number of iterations to perform in a line search (default 10). The same value is used by the Conjugate-Gradient and L-BFGS methods. % \citem{set\_armijo\_coeff(Real ac)} The first of the Wolfe conditions determines how much of a decrease in the cost function is satisfactory for a line search to complete, controlled by the Armijo coefficient (default $10^{-4}$). The same value is used by the Conjugate-Gradient and L-BFGS methods. % \citem{set\_lbfgs\_curvature\_coeff(Real lcc)} The second Wolfe condition is that the magnitude of the gradient in the search direction is reduced by a certain amount determined by the curvature coefficient, the optimum value of which is different for the Conjugate-Gradient and L-BFGS methods. The default for the L-BFGS method is 0.9. % \citem{set\_cg\_curvature\_coeff(Real cgcc)} The curvature coefficient to use for the Conjugate-Gradient method (default 0.1). % \citem{void set\_levenberg\_damping\_limits(Real damp\_min, Real damp\_max)} Set the minimum and maximum postive values of the damping parameter $\lambda$ used by both the Levenberg and Levenberg-Marquardt algorithms (default $1/128$ and $10^5$). Internally the algorithm can still use zero when each iteration is reducing the cost function. % \citem{void set\_levenberg\_damping\_start(Real damp\_start)} Set the initial damping factor for the Levenberg and Levenberg-Marquardt algorithms (default 0). % \citem{void set\_levenberg\_damping\_restart(Real damp\_restart)} Set the value of the damping factor $\lambda$ in the Levenberg and Levenberg-Marquardt algorithms that is used when a value of $\lambda=0$ does not result in the cost function being reduced (default $1/4$). % \citem{void set\_levenberg\_damping\_multiplier(Real damp\_multiply, Real damp\_divide)} Set the multiplier and divider that will be used to scale the damping factor when an iteration does not and does reduce the cost function, respectively (default 2.0 and 5.0). \end{description} The following member functions return the minimizer algorithm that the \code{Minimizer} is currently configured to use: \begin{description} \citem{MinimizerAlgorithm algorithm()} Return the enumeration representing the minimization algorithm. \citem{std::string algorithm\_name()} Return a string representing the minimization algorithm. \end{description} The following member functions extract information about the progress of the minimization after it has completed: \begin{description} \citem{int n\_iterations()} Return the number of iterations performed. Only iterations that successfully reduced the cost function are counted. % \citem{int n\_samples()} Return the number of times the cost function was computed, including times when this did not reduce the cost function. % \citem{Real cost\_function()} Return the final value of the cost function. % \citem{Real gradient\_norm()} Return the final value of the norm of the $\partial J/\partial{\bf x}$ vector. % \citem{Real start\_cost\_function()} Return the cost function for the first guess of the state vector provided by the user. % \citem{MinimizerStatus status()} Return the convergence status. \end{description} \section{Return status for minimization} \label{sec:minimizer_status} The following enumerations may be returned by \code{Minimizer}'s \code{minimize} member function representing the status of the minimization. The \code{adept::minimizer\_status\_string} function converts a status to a user-readable string, as demonstrated in one of the examples in section \ref{sec:minimizer_interface}. \begin{description} \citem{MINIMIZER\_STATUS\_SUCCESS} Minimization was successful. % \citem{MINIMIZER\_STATUS\_EMPTY\_STATE} The state vector provided is empty. % \citem{MINIMIZER\_STATUS\_MAX\_ITERATIONS\_REACHED} The maximum number of iterations was reached. % \citem{MINIMIZER\_STATUS\_FAILED\_TO\_CONVERGE} Convergence was not achieved, even though some progress may have been made in minimizing the cost function. This usually occurs when, in the vicinity of the minimum, the $J({\bf x})$ terrain is quite flat and numerical errors mean that the gradient returned from the user-supplied function does not point uphill as it should. This means that when the algorithm uses the gradient to try to go downhill it finds the cost function increasing. % \citem{MINIMIZER\_STATUS\_INVALID\_COST\_FUNCTION} The cost function returned is NaN or infinity. This is usually solved by using bounded minimization to ensure that the values of ${\bf x}$ are kept within physically reasonable bounds. \citem{MINIMIZER\_STATUS\_INVALID\_GRADIENT} The gradient vector returned contains NaN or infinity values. Use bounded minimization. % \citem{MINIMIZER\_STATUS\_INVALID\_BOUNDS} The bounds requested are not valid, for instance a maximum bound was requested that is less than the minimum bound. %\citem{MINIMIZER\_STATUS\_NUMBER\_AVAILABLE} %\citem{MINIMIZER\_STATUS\_NOT\_YET\_CONVERGED \end{description} \chapter{General considerations} \label{chap:gen} \section{Setting and checking the global configuration} \label{sec:settings} \noindent The following non-member functions are provided in the \code{adept} namespace: \begin{description} \citem{std::string version()} Returns a string containing the version number of the \Adept\ library (e.g. ``\code{2.0.8}''). \citem{std::string compiler\_version()} Returns a string containing the compiler name and version used to compile the \Adept\ library. \citem{std::string compiler\_flags()} Returns a string containing the compiler flags used when compiling the \Adept\ library. \citem{std::string configuration()} Returns a multi-line string listing numerous aspects of the way \Adept\ has been configured. \citem{bool have\_matrix\_multiplication()} Returns \code{true} if the Adept library has been compiled with BLAS support, \code{false} otherwise. \citem{bool have\_linear\_algebra()} Returns \code{true} if the Adept library has been compiled with LAPACK support, \code{false} otherwise. \citem{int set\_max\_blas\_threads(int n)} Set the maximum number of threads used for matrix operations by the BLAS library, or zero to use the upper limit on your system. The number returned is the number actually used. \citem{int max\_blas\_threads()} Return the maximum number of threads available for matrix operations by the BLAS library. % \end{description} The preprocessor can detect the \Adept\ version at run-time via the \code{ADEPT\_VERSION} preprocessor variable, which is an integer variable with the digits $abbcc$ corresponding to \Adept\ version $a.bb.cc$. This could be used to activate a different compile path dependent on the version, or even to fail to compile if the version is not recent enough: \begin{lstlisting} #if ADEPT_VERSION < 10910 #error "Adept >= 1.9.10 is required by this program" #endif \end{lstlisting} \section{Parallelizing \Adept\ programs} \Adept\ currently has limited built-in support for parallelization. If the algorithms that you wish to differentiate are individually small enough to be treated by a single processor core, and you wish to differentiate multiple algorithms independently (or the same algorithm but with multiple sets of inputs) then parallelization is straightforward. This is because the global variable containing a pointer to the \Adept\ stack uses thread-local storage. This means that if a process spawns multiple threads (e.g.\ using OpenMP or Pthreads) then each thread can declare one \code{adept::Stack} object and all \code{adouble} operations will result in statements being stored on the stack object specific to that thread. The \Adept\ package contains a test program \code{test\_thread\_safe} that demonstrates this approach in OpenMP. If your problem is larger and you wish to use parallelism to speed-up the differentiation of a single large algorithm then the build-in support is more limited. Provided your program and the \Adept\ library were compiled with OpenMP enabled (which is the default for the \Adept\ library if your compiler supports OpenMP), the computation of Jacobian matrices will be parallelized. By default, the maximum number of concurrent threads will be equal to the number of available cores, but this can be overridden with the \code{set\_max\_jacobian\_threads} member function of the \code{Stack} class. Note that the opportunity for speed-up depends on the size of your Jacobian matrix: for an $m\times n$ matrix, the number of independent passes through the stored data is $\mathrm{min}(m,n)$ and each thread treats \code{ADEPT\_MULTIPASS\_SIZE} of them (see section \ref{sec:configuring_lib}), so the maximum number of threads that can be exploited is $\mathrm{min}(m,n)/$\code{ADEPT\_MULTIPASS\_SIZE}. Again, the \code{test\_thread\_safe} program can demonstrate the parallelization of Jacobian calculations. Note, however, that if the \code{jacobian} function is called from within an OpenMP thread (e.g.\ if the program already uses OpenMP with each thread containing its own \code{adept::Stack} object), then the program is likely not to be able to spawn more threads to assist with the Jacobian calculation. If you need Jacobian matrices then the ability to parallelize the calculation of them is useful since this tends to be more computationally costly than recording the original algorithm. If you only require the tangent-linear or adjoint calculations (equivalent to a Jacobian calculation with $n=1$ or $m=1$, respectively), then unfortunately you are stuck with single threading. It is intended that a future version of \Adept\ will enable all aspects of differentiating an algorithm to be parallelized with either or both of OpenMP and MPI. If your BLAS library has support for parallelization then be aware that the performance may be poor if other parts of the program are parallelized. This occurs with OpenBLAS, which uses Pthreads, if you also use parallelized Jacobian calculations, which use OpenMP. In this instance you can turn off parallelization of array operations with the \code{set\_max\_blas\_threads(1)} function in the \code{adept} namespace. The number of available threads for array operations is returned by the \code{max\_blas\_threads()} function. Alternatively, you can use the \code{OPENBLAS\_NUM\_THREADS} environment variable to control the number of threads used by OpenBLAS, and the \code{OMP\_NUM\_THREADS} environment variable to control the number used in Jacobian calculations. \section{The fast exponential function} \label{sec:fastexp} \Adept\ was originally developed for algorithms that make frequent calls to the exponential function \code{exp}, but unfortunately most compilers do not vectorize \code{exp}. Therefore, \Adept\ provides the function \code{fastexp} in the \code{adept} namespace, which can operate on active and passive scalars and array arguments (including the simple \code{float} and \code{double}) just like \code{exp}. It uses an adapted form of an algorithm from Agner Fog's Vector Class Library (VCL) that is around a third faster for scalar arguments, but can be vectorized making it as much as 10 times faster when applied to \Adept\ arrays depending on the instruction set available. It is accurate but not bit-reproducible with \code{exp} and produces finite results for a slightly smaller range of input values: from $-87.3$ to $+89.0$ for \code{float} arguments and from $-708.39$ to $+709.70$ for \code{double} arguments. If you have an existing code that calls \code{exp} with \Adept\ types as arguments, and wish to use the faster algorithm for all of them, then simply compile your code with \code{-DADEPT\_FAST\_EXPONENTIAL}. This will not change the behaviour of \code{exp} for other types of arguments, which would typically use the version from the C++ standard library. If you compile your code with \code{-DADEPT\_FAST\_SCALAR\_EXPONENTIAL} then a fast exponential function \code{adept::exp} will be defined that works on arguments of type \code{float} and \code{double}. However, this can cause a namespace clash as some C header files import \code{exp} from the standard library outside of any namespace. \section{Tips for the best performance} \label{sec:tips} \begin{itemize} \item If you are working with single-threaded code, or in a multi-threaded program but with only one thread using a Stack object, then you can get slightly faster code by compiling all of your code with \code{-DADEPT\_STACK\_THREAD\_UNSAFE}. This uses a standard (i.e. non-thread-local) global variable to point to the currently active stack object, which is slightly faster to access. \item If you compile with the \code{-g} option to store debugging symbols, your object files and executable will be much larger because every mathematical statement in the file will have the name of its associated templated type stored in the file, and these names can be long. Once you have debugged your code, you may wish to omit debugging symbols from production versions of the executable, or reduce the level of detail with \code{-g1} (on the GNU C++ compiler). There is typically no performance penalty associated with including debugging symbols. \item A high compiler optimization setting is recommended to inline the function calls associated with mathematical expressions. On the GNU C++ compiler, the \code{-O3 -march=native} setting is recommended. \item As outlined in the previous section, if you use the \code{exp} function then you can replace them with the faster \code{fastexp} function in or compile your code with \code{-DADEPT\_FAST\_EXPONENTIAL}. \item On Intel and ARM architectures, \Adept\ will use the SSE2, AVX, AVX512 or NEON instruction sets (depending on availability) to vectorize array expressions that satisfy a number of requirements: (1) they contain only elementary mathematical operators (including the functions \code{sqrt}, \code{max}, \code{min} and \code{fastexp}), (2) the arrays in the expression are either all of type \code{float} or all of type \code{double}, (3) all the arrays in the expression must have their final dimension increasing in memory with no stride, and (4) none of the arrays are active. On the GNU compiler the \code{-march=native} selects the best available instruction set, but you can select a specific set with \code{-msse2}, \code{-mavx} or \code{-mavx512f}. With the SSE2 and NEON instruction sets, 2 \code{double}s or 4 \code{float}s are operated on at once, for AVX these rise to 4 and 8 respectively, and for AVX512 they rise to 8 and 16 respectively. \item By default the Jacobian functions are compiled to process a strip of rows or columns of the Jacobian matrix at once. The optimum width of the strip depends on your platform, and you may wish to change it. To make the Jacobian functions process \textit{n} rows or columns at once, recompile the \Adept\ library with \code{-DADEPT\_MULTIPASS\_SIZE=}\textit{n}. \item If you suspect memory usage is a problem, you may investigate the memory used by \Adept\ by simply sending your \code{Stack} object to a stream, e.g. ``\code{std::cout \textless\textless\ stack}''. You may also use the \code{memory()} member function, which returns the total number of bytes used. Further details of similar functions is given in section \ref{sec:stack}. \end{itemize} \section{Exceptions thrown by the \Adept\ library} \label{sec:exceptions} Some functions in the \Adept\ library can throw exceptions, and the exceptions that can be thrown are typically derived from either \code{adept::autodiff\_exception} or \code{adept::array\_exception}. These classes are derived from \code{adept::exception}, which is itself derived from \code{std::exception}. Most indicate an error in the users code, usually associated with calling \Adept\ functions in the wrong order. An overly comprehensive exception-catching implementation that takes different actions depending on whether a specific \Adept\ exception, an exception related to automatic differentiation, a general \Adept\ exception, or a non-\Adept\ exception is thrown, could have the following form: % \begin{lstlisting} try { adept::Stack stack; // ... Code using the Adept library goes here ... } catch (adept::stack_already_active& e) { // Catch a specific Adept exception std::cerr << "Error: " << e.what() << std::endl; // ... any further actions go here ... } catch (adept::autodiff_exception& e) { // Catch any Adept exception related to automatic differentiation not yet caught std::cerr << "Error: " << e.what() << std::endl; // ... any further actions go here ... } catch (adept::exception& e) { // Catch any other Adept exception not yet caught std::cerr << "Error: " << e.what() << std::endl; // ... any further actions go here ... } catch (...) { // Catch any exceptions not yet caught std::cerr << "An error occurred" << std::endl; // ... any further actions go here ... } \end{lstlisting} % All exceptions implement the \code{what()} member function, which returns a \code{const char*} containing an error message. \subsection{General exceptions} The following exceptions are not specific to arrays or automatic differentiation and inherit directly from \code{adept::exception}:: \begin{description} \citem{feature\_not\_available} This exception is thrown by deprecated functions, such as \code{Stack::start()}. It is also thrown by functions that are not available because a certain library is not being used, such as \code{inv} if \Adept\ was compiled without LAPACK support, or matrix multiplciation via the `\code{**}' psudo-operator if \Adept\ was compiled without BLAS support. \end{description} \subsection{Automatic-differentiation exceptions} The following exceptions relate to automatic differentiation (the functionality described in chapter \ref{chap:ad}), and all are in the \code{adept} namespace: \begin{description} \citem{gradient\_out\_of\_range} This exception can be thrown by the \code{adouble::get\_gradient} member function if the index to its gradient is larger than the number of gradients stored. This can happen if the \code{adouble} object was created after the first \code{adouble::set\_gradient} call since the last \code{Stack::new\_recording} call. The first \code{adouble::set\_gradient} call signals to the \Adept\ stack that the main algorithm has completed and so memory can be allocated to store the gradients ready for a forward or reverse pass through the differential statements. If further \code{adouble} objects are created then they may have a gradient index that is out of range of the memory allocated. % \citem{gradients\_not\_initialized} This exception can be thrown by functions that require the list of working gradients to have been initialized (particularly the functions \code{Stack::compute\_tangent\_linear} and \code{Stack::compute\_adjoint}). This initialization occurs when \code{adouble::set\_gradient} is called. % \citem{stack\_already\_active} This exception is thrown when an attempt is made to make a particular \code{Stack} object ``active'', but there already is an active stack in this thread. This can be thrown by the \code{Stack} constructor or the \code{Stack::activate} member function. % \citem{dependents\_or\_independents\_not\_identified} This exception is thrown when an attempt is made to compute a Jacobian but the independents and/or dependents have not been identified. % \citem{wrong\_gradient} This exception is thrown by the \code{adouble::append\_derivative\_dependence} if the \code{adouble} object that it is called from is not the same as that of the most recent \code{adouble::add\_derivative\_dependence}. % \citem{non\_finite\_gradient} This exception is thrown if the users code is compiled with the preprocessor variable \code{ADEPT\_TRACK\_NON\_FINITE\_GRADIENTS} defined, and a mathematical operation is carried out for which the derivative is not finite. This is useful to locate the source of non-finite derivatives coming out of an algorithm. \end{description} \subsection{Array exceptions} \label{sec:array_exceptions} The following exceptions relate to arrays (the functionality described in chapter \ref{chap:arrays}), and all are in the \code{adept} namespace: \begin{description} \citem{size\_mismatch} A mathematical operation taking two arguments has been applied to array expressions that are not of the same size. The same exception is thrown if an array expression is applied to an array of a different size. \citem{inner\_dimension\_mismatch} Matrix multiplication has been attempted with arrays whose inner dimensions don't agree. \citem{empty\_array} An empty array has been used in an operation when a non-empty array is required; for example, if an attempt is made to link an array to an empty array (see section \ref{sec:array} for more information on linking). \citem{invalid\_dimension} Attempt to create an array with a negative dimension. \citem{index\_out\_of\_bounds} An element or range of elements has been requested from an array but one of the indices provided is out of range; for a dimension of length $n$, the index is not in the range $0$ to $n-1$. Note that bounds checking is only applied if the preprocessor variable \code{ADEPT\_BOUNDS\_CHECKING} is defined. %\citem{invalid\_lvalue} \citem{invalid\_operation} An invalid operation has been performed that can only be detected at run-time, for example, calling the \code{diag\_submatrix} member function of a non-square rank-2 \code{Array}. \citem{matrix\_ill\_conditioned} An attempt has been made to factorize an ill-conditioned matrix (either via \code{solve} or \code{inv}). \citem{fortran\_interoperability\_error} An attempt has been made to associate an \Adept\ \code{Array} with a \code{FortranArray} of the wrong rank or type. \end{description} \section{Configuring the behaviour of \Adept} \label{sec:configuring} The behaviour of the \Adept\ library can be changed by defining one or more of the \Adept\ preprocessor variables. This can be done either by editing the \code{adept/base.h} file and uncommenting the relevant \code{\#define} lines, or by compiling your code with \code{-Dxxx} compiler options (replacing \code{xxx} by the relevant preprocessor variable. There are two types of preprocessor variable: the first types only apply to the compilation of user code, while the second types require the \Adept\ library to be recompiled. \subsection{Modifications not requiring a library recompile} \label{sec:configuring_no_lib} The preprocessor variables that apply only to user code and do not require the \Adept\ library to be recompiled are as follows: \begin{description} \citem{ADEPT\_STACK\_THREAD\_UNSAFE} If this variable is defined, the currently active stack is stored as a global variable but is not defined to be ``thread-local''. This is slightly faster, but means that you cannot use multi-threaded code with separate threads holding their own active \code{Stack} object. Note that although defining this variable does not require a library recompile, all source files that make up a single executable must be compiled with this option (or all not be). % \citem{ADEPT\_RECORDING\_PAUSABLE} This option enables an algorithm to be run both with and without automatic differentiation from within the same program via the functions \code{Stack::pause\_recording()} and \code{Stack::continue\_recording()}. Note that although defining this variable does not require a library recompile, all source files that make up a single executable must be compiled with this option (or all not be). Further details on this option are provided in section \ref{sec:pausable}. % \citem{ADEPT\_NO\_AUTOMATIC\_DIFFERENTIATION} This option turns off automatic differentiation by treating \code{adouble} objects as \code{double}. It is useful if you want to compile one source file twice to produce versions with and without automatic differentiation. Further details on this option are provided in section \ref{sec:multipleobjects}. % \citem{ADEPT\_TRACK\_NON\_FINITE\_GRADIENTS} Often when an algorithm is first converted to use an operator-overloading automatic differentiation library, the gradients come out as Not-a-Number or Infinity. The reason is often that the algorithm contains operations for which the derivative is not finite (e.g.\ $\sqrt{a}$ for $a=0$), or constructions where a non-finite value is produced but subsequently made finite (e.g.\ $\exp(-1.0/a)$ for $a=0$). Usually the algorithm can be recoded to avoid these problems, if the location of the problematic operations can be identified. By defining this preprocessor variable, a \code{non\_finite\_gradient} exception will be thrown if any operation results in a non-finite derivative. Running the program within a debugger (and ensuring that the exception is not caught within the program) enables the offending line to be identified. % \citem{ADEPT\_INITIAL\_STACK\_LENGTH} This preprocessor variable is set to an integer, and is used as the default initial amount of memory allocated for the recording, in terms of the number of statements and operations. % \citem{ADEPT\_REMOVE\_NULL\_STATEMENTS} If many variables in your code are likely to be zero then redundant operations will be added to the list of differential statements. For example, the assignment $a=b\times c$ with active variables $b$ and $c$ both being zero results in the differential statement $\delta a=0\times\delta b+0\times\delta c$. This preprocessor variable checks for zeros and removes terms on the right-hand-side of differential statements if it finds them. In this case it would put $\delta a=0$ on the stack instead. This option slows down the recording stage, but speeds up the subsequent use of the recorded stack for adjoint and Jacobian calculations. The speed up of the latter is only likely to exceed the slow down of the former if your code contains many zeros. For most codes, this option causes a net slow down. % \citem{ADEPT\_COPY\_CONSTRUCTOR\_ONLY\_ON\_RETURN\_FROM\_FUNCTION} In \Adept\ 1.1 this enabled a small but unsafe optimization. It now has no effect. % \citem{ADEPT\_BOUNDS\_CHECKING} If this variable is defined, check that all array indices are within the bounds of the array throwing an \code{index\_out\_of\_bounds} exception if necessary. If this variable is not defined then these checks are not performed, which is faster but means that attempts to access arrays out of bounds will result either of corruption of other memory used by the process, or a segmentation fault. \citem{ADEPT\_NO\_ALIAS\_CHECKING} This variable turns off alias checking, which results in faster code, but may lead to unexpected results if the right-hand-side of an array statement shares data with the left-hand-side of the expression. If this is likely for a particular statement then use the \code{eval} function, described in section \ref{sec:bounds}. \citem{ADEPT\_NO\_DIMENSION\_CHECKING} This variable turns off checking the dimensions match when an array expression is assigned to another array. \citem{ADEPT\_STORAGE\_THREAD\_SAFE} This variable ensures that accesses to the reference counter in \code{Storage} objects are atomic, enabling the \code{Array} and \code{SpecialMatrix} objects that use them to be accessed safely in a multi-threaded environment. Note that this may incur a performance penalty, and is only available in C++11. See section \ref{sec:thread}. \citem{ADEPT\_INIT\_REAL\_SNAN} To detect errors caused by use of uninitialized data, initialize floating point arrays and active scalars with signaling NaNs. This is typically accompanied by directing the program to fail with a floating-point exception if a NaN is used in an expression, achieved by adding the following to one of the program source files: \begin{lstlisting} #include int _feenableexcept_status = feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW); \end{lstlisting} It should then be possible to use a debugger to identify the array that was read before being initialized with real numbers. \end{description} \subsection{Modifications requiring a library recompile} \label{sec:configuring_lib} \noindent The preprocessor variables that require the \Adept\ library to be recompiled are as follows. Note that if these variables are used they must be the same when compiling both the library and the user code. This is safest to implement by editing section 2 of the \code{adept/base.h} header file. \begin{description} \citem{ADEPT\_REAL\_TYPE\_SIZE} If you want to compile \Adept\ to use a precision other than double for the \code{Real} type, and hence for automatic differentiation, then define this preprocessor variable to be \code{4} (for \code{float}), \code{8} (for \code{double}) or \code{16} (for \code{long double}). This will also change the default floating-point type for arrays, including shortcuts such as \code{Vector}, \code{Matrix}, \code{SymmMatrix}. Note that if you specify \code{16} but your compiler cannot support it (i.e.\ \code{sizeof(long double)==8}) then \Adept\ would produce sub-optimal code so will fail to compile. % \citem{ADEPT\_STACK\_STORAGE\_STL} Use the C++ standard template library \code{vector} or \code{valarray} classes for storing the recording and the list of gradients, rather than dynamically allocated arrays. In practice, this tends to slow down the code. % \citem{ADEPT\_MULTIPASS\_SIZE} This is set to an integer, invariably a power of two, specifying the number of rows or columns of a Jacobian that are calculated at once. The optimum value depends on the platform and the capability of the compiler to optimize loops whose length is known at compile time. % \citem{ADEPT\_MULTIPASS\_SIZE\_ZERO\_CHECK} This is also set to an integer; if it is greater than \codebf{ADEPT\_MULTIPASS\_SIZE}, then the \code{Stack::jacobian\_reverse} function checks gradients are non-zero before using them in a multiplication. % \citem{ADEPT\_THREAD\_LOCAL} This can be used to specify the way that thread-local storage is declared by your compiler. Thread-local storage is used to ensure that the \Adept\ library is thread-safe. By default this variable is not defined initially, and then later in \code{adept/base.h} it is set to an appropriate value on your system: \code{thread\_local} if you compile with the C++11 standard, otherwise \code{\_\_declspec(thread)} on Microsoft Visual C++, an empty declaration on Mac (since thread-local storage is not available on many Mac platforms) and \code{\_\_thread} otherwise (appropriate for at least the GNU, Intel, Sun and IBM compilers). To override the default behaviour, define this variable yourself in \code{adept/base.h}. \end{description} \section{Frequently asked questions} \label{sec:faq} \begin{description} \item[Why are all the gradients coming out of the automatic differentiation zero?] You have almost certainly omitted or misplaced the call of the \code{adept::Stack} member function ``\code{new\_recording()}''. It should be placed \emph{after} the independent variables in the algorithm have been initialized, but before any subsequent calculations are performed on these variables. If it is omitted or placed before the point where the independent variables are initialized, the differential statements corresponding to this initialization (which are all of the form $\delta x=0$), will be placed in the list of differential statements and will unhelpfully set to zero all your gradients right at the start of a forward pass (resulting from a call to \code{forward()}) or set them to zero right at the end of a reverse pass (resulting from a call to \code{reverse()}). \item[Why are the gradients coming out of the automatic differentiation NaN or Inf (even though the value is correct)?] This can occur if the algorithm contains operations for which the derivative is not finite (e.g.\ $\sqrt{a}$ for $a=0$), or constructions where a non-finite value is produced but subsequently made finite (e.g.\ $\exp(-1.0/a)$ for $a=0$). Usually the algorithm can be recoded to avoid these problems, if the location of the problematic operations can be identified. The simplest way to locate the offending statement is to recompile your code with the \code{-g} option and the \code{ADEPT\_TRACK\_NON\_FINITE\_GRADIENTS} preprocessor variable set (see section \ref{sec:configuring_no_lib}). Run the program within a debugger and a \code{non\_finite\_gradient} exception will be thrown, which if not caught within the program will enable you to locate the line in your code where the problem originated. You may need to turn optimizations off (compile with \code{-O0}) for the line identification to be accurate. You can also turn on trapping of floating-point exceptions, as explained in the description of the \code{ADEPT\_INIT\_REAL\_SNAN} option in section \ref{sec:configuring_no_lib}. \item[Why are the gradients coming out of the automatic differentiation wrong?] Before suspecting a bug in \Adept, note that round-off error can lead to incorrect gradients even in hand-coded differential code. Consider the following: \begin{lstlisting} int main() { Stack stack; adouble a = 1.0e-26, b; stack.new_recording(); b = sin(a) / a; b.set_gradient(1.0); stack.compute_adjoint(); std::cout << "a=" << a << ", b=" << b << ", db/da=" << a.get_gradient() << "\n"; } \end{lstlisting} We know that near \code{a=0} we should have \code{b=1} and the gradient should be \code{0}. But running the program above will give a gradient of \code{1.71799e+10}. If you hand-code the gradient, i.e. \begin{lstlisting} double A = 1.0e-26; double dB_dA = cos(A)/A - sin(A) / (A*A); \end{lstlisting} you will you will also get the wrong gradient. You can see that the answer is the difference of two very large numbers and so subject to round-off error. This example is therefore not a bug of \Adept, but a limitation of finite-precision machines. To check this, try compiling your code using either the ADOL-C or CppAD automatic differentiation tools; I have always found these tools to give exactly the same gradient as \Adept. Unfortunately, round-off error can build up over many operations to give the wrong result, so there may not be a simple solution in your case. \item[Can \Adept\ reuse a stored tape for multiple runs of the same algorithm but with different inputs?] No. \Adept\ does not store the full algorithm in its stack (as ADOL-C does in its tapes, for example), only the derivative information. So from the stack alone you cannot rerun the function with different inputs. However, rerunning the algorithm including recomputing the derivative information is fast using \Adept, and is still faster than libraries that store enough information in their tapes to enable a tape to be reused with different inputs. It should be stressed that for any algorithm that includes different paths of execution (``if'' statements) based on the values of the inputs, such a tape would need to be rerecorded anyway. This includes any algorithm containing a look-up table. \item[Why does my code crash with a segmentation fault?] This means it is trying to access a memory address not belonging to your program, and the first thing to do is to run your program in a debugger to find out at what point in your code this occurs. If it is in the \code{adept::aReal} constructor (note that \code{aReal} is synonymous with \code{adouble}), then it is very likely that you have tried to initiate an \code{adept::adouble} object before initiating an \code{adept::Stack} object. As described in section \ref{sec:stack_setup}, there are good reasons why you need to initialize the \code{adept::Stack} object first. \item[How can I interface \Adept\ with a matrix library such as Eigen?] Unfortunately the use of expression templates in \Adept\ means that it does not work optimally (if it works at all) with third-party matrix libraries that use expression templates. This is the reason why Adept 2.0 combines array functionality with automatic differentiation in a single expression-template framework. \item[Do you have plans to enable \Adept\ to produce Hessian matrices?] Not in the near future as this is a huge change. However, if your cost function $J(\x)$ (also known as a cost function or penalty function) has a specific form then the approximate Hessian matrix can be computed from the Jacobian matrix, as described in chapter \ref{chap:optimize}. \item[Why doesn't the ternary operator work?] Some compilers will fail to compile the following function: \begin{lstlisting} adept::adouble piecewise(adept::adouble x) { return x < 1.0 ? x*x : 2.0*x-1.0; } \end{lstlisting}% The reason is that these compilers require that the two possible outcomes of the ternary operator have the same type, but due to the use of expression templates, the types of these mathematical expressions are actually different. The ternary operator cannot be overloaded to allow such arguments. The solution is to explicitly convert the outcomes to \code{adouble}: \begin{lstlisting} adept::adouble piecewise(adept::adouble x) { return x < 1.0 ? adept::adouble(x*x) : adept::adouble(2.0*x-1.0); } \end{lstlisting} \item[Why is my executable so huge?] Probably you are including debugging symbols by compiling with the \code{-g} option. Expression templates need long strings to describe them, so this extra content can increase the size of object files and executables by a factor of ten. This does not slow down execution, but for production code you may wish to compile without debugging symbols, or if you use the GNU compiler use instead the \code{-g1} option which stores a reduced amount of debugging information. \item[Why do I get incorrect behaviour when I use the ``\code{auto}'' keyword?] Since C++11, many programmers make widespread use of \code{auto} as the type of a local object that can be inferred from its initializer. This is problematic for most expression-template libraries, including \Adept, because expressions are not evaluated immediately. For example, dividing one \code{aReal} object by another returns an \code{adept::internal::BinaryOperation} object, and the division is only performed (and differentiated) when this object is assigned to an \code{aReal} object. The \code{auto} keyword will be interpreted as the type of the internal object, but this internal object may contain references to temporary objects that make up the other parts of the expression, and which go out of scope after the semi-colon at the end of the \code{auto} statement. In this example the correct behaviour is obtained by replacing \code{auto} with \code{aReal}. Never to use the \code{auto} keyword when initializing an object from an \Adept\ expression. \end{description} \section{Copyright and license for \Adept\ software} \label{sec:license} Versions 1.9 of \Adept\ and later are owned and copyrighted jointly by the University of Reading and the European Centre for Medium Range Weather Forecasts. The copyright to versions 1.1 and earlier is held solely by the University of Reading. Since version 1.1, the \Adept\ library is released under the Apache License, Version 2.0, which is available at \url{http://www.apache.org/licenses/LICENSE-2.0}. In short, this free-software license permits you to use the library for any purpose, and to modify it and combine it with other software to form a larger work. If you choose, you may release the modified software in either source code or object code form, so may use \Adept\ in both open-source software and non-free proprietary software. However, distributed versions must retain copyright notices and also distribute both the information in the NOTICES file and a copy of the Apache License. Different license terms may be applied to your distributed software, although they must include the conditions on redistribution provided in the Apache License. This is a just short summary; if in doubt, consult the text of the license. In addition to the legally binding terms of the license, it is \emph{requested} that: \begin{itemize} \item You cite \cite{Hogan2014} in publications describing algorithms and software that make use of the \Adept\ library. While not not a condition of the license, this is good honest practice in science and engineering. \item If you make modifications to the \Adept\ library that might be useful to others, you release your modifications under the terms of the Apache License, Version 2.0, so that they are available to others and could also be merged into a future official version of \Adept. If you do not state the license applied to your modifications then by default they will be under the terms of the Apache License. You will retain copyright of your modifications, but if your modifications are written in the course of employment then under almost all circumstances (including employment by a University) it is your employer who holds the copyright. Therefore you should obtain permission from them to release your modifications under the Apache License. \end{itemize} Note that other source files in the \Adept\ package used for demonstrating and benchmarking \Adept\ are released under the GNU all-permissive license\footnote{The GNU all-permissive license reads: \emph{Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty.}}, which is specified at the top of all files it applies to. \Adept\ version 1.0 was released under the terms of the GNU General Public License (GPL) and so could not be released as part of a larger work unless the entire work was released under the conditions of the GPL. It is hoped that the switch to the Apache License will facilitate wider use of \Adept. \section*{Acknowledgments} Adept 1.0 was developed by Robin Hogan at the University of Reading with funding from European Space Agency contract 40001041528/11/NL/CT. Some of the modifications to produce version 1.1 were funded by a National Centre for Earth Observation Mission Support grant (Natural Environment Research Council grant NE/H003894/1). Dr Brian Tse is thanked for his work exploring different parallelization strategies during this period. Subsequent development has been carried out under employment at the European Centre for Medium Range Weather Forecasts. \begin{thebibliography}{00} \markright{References} \harvarditem{Bell}{2007}{Bell2007}Bell, B., 2007: CppAD: A package for C++ algorithmic differentiation. \url{http://www.coin-or.org/CppAD} % \harvarditem{Liu and Nocedal}{1989}{Liu+1989}Liu, D. C., and Nocedal, J., 1989: On the limited memory method for large scale optimization. \emph{Math.\ Programming B,} {\bf 45,} 503--528. % \harvarditem{Gay}{2005}{Gay2005}Gay, D. M., 2005: Semiautomatic differentiation for efficient gradient computations. In \emph{Automatic Differentiation: Applications, Theory, and Implementations}, H. M. B\"ucker, G. F. Corliss, P. Hovland, U. Naumann and B. Norris (eds.), Springer, 147--158. % \harvarditem{Griewank et~al.}{1996}{Griewank+1996}Griewank, A., Juedes, D., and Utke, J., 1996: Algorithm 755: ADOL-C: a package for the automatic differentiation of algorithms written in C/C++. \textit{ACM Trans.\ Math.\ Softw.,} \textbf{22,} 131--167. \harvarditem{Hogan}{2014}{Hogan2014}Hogan, R. J., 2014: Fast reverse-mode automatic differentiation using expression templates in C++. \textit{ACM Trans.\ Math.\ Softw.,} \textbf{40,} 26:1-26:16. \harvarditem{Veldhuizen}{1995}{Veldhuizen1995}Veldhuizen, T., 1995: Expression templates. {\it C++ Report,} {\bf 7,} 26--31. \end{thebibliography} \end{document} ================================================ FILE: doc/adept_reference.tex ================================================ \documentclass[10pt,a4,landscape]{article} % Page set up \setlength{\oddsidemargin}{-1cm} %{0.5cm} \setlength{\evensidemargin}{-1cm} %{0.5cm} \setlength{\topmargin}{-3cm} %\setlength{\topmargin}{0cm} %\setlength{\textheight}{24cm} %\setlength{\textwidth}{16cm} \setlength{\textheight}{19cm} \setlength{\textwidth}{26cm} \setlength{\marginparsep}{0.5cm} \setlength{\marginparwidth}{0cm} %\setlength{\parindent}{1em} %\setlength{\parskip}{0.5ex} \def\myvskip{\vskip 1ex} \def\hangingpar{\parshape 2 0cm \linewidth 1ex \dimexpr\linewidth-1ex\relax} \renewcommand{\baselinestretch}{1.05} \sloppy %\usepackage{multicol} \usepackage{lmodern}\usepackage[T1]{fontenc} \usepackage{color} \usepackage[figuresright]{rotating} \DeclareFontFamily{T1}{lmttc}{\hyphenchar \font-1 } \DeclareFontShape{T1}{lmttc}{m}{n} {<-> ec-lmtlc10}{} \DeclareFontShape{T1}{lmttc}{m}{it} {<->sub*lmttc/m/sl}{} \DeclareFontShape{T1}{lmttc}{m}{sl} {<-> ec-lmtlco10}{} %\def\myfont{\fontfamily{cmss}\fontseries{lmtt}\selectfont} \def\myfont{\fontfamily{cmss}\selectfont} \def\mysize{\footnotesize} \def\mysize{\small} \def\codeindent{\hspace{\tabcolsep}} \setlength{\parindent}{0pt} \def\code#1{\texttt{#1}} \renewcommand{\rmdefault}{cmss} \begin{document} \pagestyle{empty} \twocolumn \mysize\myfont\section*{\Huge Adept Quick Reference} %\section*{General} All functions and types are placed in the \code{adept} namespace. \subsection*{Header files} \begin{tabular}{ll} \code{adept.h} & Include if only scalar automatic differentiation is required\\ \code{adept\_arrays.h} & Include if array capabilities are needed as well\\ \code{adept\_fortran.h} & Interface to Fortran 2018 array descriptors\\ \code{adept\_optimize.h} & Minimization algorithms, e.g.\ Levenberg-Marquardt\\ \code{adept\_source.h} & Include entire Adept library, so linking to library not required \\ \end{tabular} %\section*{Automatic differentiation functionality} \subsection*{Scalar types} \begin{tabular}{ll} \code{Real} & Passive scalar type used for differentiation (usually \code{double})\\ \code{aReal} & Active scalar of underlying type \code{Real} \\ \code{adouble}, \code{afloat} & Active scalars of underlying type \code{double} and \code{float}\\ \end{tabular} \subsection*{Basic reverse-mode workflow} \begin{tabular}{ll} \code{Stack stack;} & Object to store derivative information\\ \code{aVector x = \{1.0, 2.0\};} & Initialize independent (input) variables (C++11)\\ \code{stack.new\_recording();} & Start a new recording\\ \code{aReal J = algorithm(x);} & Any complicated algorithm here\\ \code{J.set\_gradient(1.0);} & Seed adjoint of cost function\\ \code{stack.reverse();} & Perform reverse-mode differentiation\\ \code{Vector dJ\_dx = x.get\_gradient();} & Return gradients of output with respect to inputs\\ \end{tabular} \subsection*{Basic Jacobian workflow} \begin{tabular}{ll} \code{Stack stack;} & Object to store derivative information\\ \code{aVector x = \{1.0, 2.0\};} & Initialize independent (input) variables (C++11)\\ \code{stack.new\_recording();} & Start a new recording\\ \code{aVector y = algorithm(x);} & Algorithm with vector output\\ \code{stack.independent(x);} & Declare independent variables \\ \code{stack.dependent(y);} & Declare dependent variables\\ \code{Matrix dy\_dx = stack.jacobian();} & Compute Jacobian matrix\\ \end{tabular} \subsection*{\code{aReal} member functions} The first three functions below also work with active array arguments, where \code{g} would be of the equivalent passive array type:\\ \begin{tabular}{ll} \code{.set\_gradient(g)} & Initialize gradient to \code{g} \\ \code{.get\_gradient()} & After forward or reverse pass, return gradient\\ \code{.get\_gradient(g)} & As above, but writing gradient to \code{g}\\ \code{.add\_derivative\_dependence(a,p)} & Add \code{p}$\times\delta$\code{a} to the stack\\ \code{.append\_derivative\_dependence(a,p)} & Append $+$\code{p}$\times\delta$\code{a} to the stack\\ \end{tabular} \subsection*{\code{Stack} member functions} Constructors:\\ \begin{tabular}{ll} \code{Stack stack;} & Construct and activate immediately \\ \code{Stack stack(false);} & Construct in inactive state\\ \end{tabular} Member functions:\\ \begin{tabular}{ll} \code{.new\_recording()} & Clear any existing differential statements\\ \code{.pause\_recording()} & Pause recording (\code{ADEPT\_PAUSABLE\_RECORDING} needed)\\ \code{.continue\_recording()} & Continue recording \\ \code{.is\_recording()} & Is Adept currently recording?\\ \code{.forward()} & Perform forward-mode differentiation\\ \code{.compute\_tangent\_linear()} & ...as above\\ \code{.reverse()} & Perform reverse-mode differentiation\\ \code{.compute\_adjoint()} & ...as above\\ \code{.independent(x)} & Declare an independent variable (active scalar or array)\\ \code{.independent(xptr,n)} & Declare \code{n} independent scalar variables starting at \code{xptr} \\ \code{.dependent(y)} & Declare a dependent variable (active scalar or array)\\ \code{.dependent(yptr,n)} & Declare \code{n} dependent scalar variables starting at \code{yptr}\\ \code{.jacobian()} & Return Jacobian matrix\\ \code{.jacobian(jacptr)} & Place Jacobian matrix into \code{jacptr} (column major)\\ \code{.jacobian(jacptr,false)} & Place Jacobian matrix into \code{jacptr} (row major)\\ \code{.clear\_gradients()} & Clear gradients set with \code{set\_gradient} function \\ \code{.clear\_independents()} & Clear independent variables\\ \code{.clear\_dependents()} & Clear dependent variables\\ \code{.n\_independents()} & Number of independent variables declared \\ \code{.n\_dependents()} & Number of dependent variables declared\\ %\end{tabular} %\begin{tabular}{ll} \code{.print\_status()} & Print status of \code{Stack} to standard output\\ \code{.print\_statements()} & Print list of differential statements\\ \code{.print\_gradients()} & Print current values of gradients\\ \code{.activate()} & Activate the stack \\ \code{.deactivate()} & Deactivate the stack\\ \code{.is\_active()} & Is the stack currently active?\\ \code{.memory()} & Return number of bytes currently used\\ \code{.preallocate\_statements(n)} & Preallocate space for \code{n} statements\\ \code{.preallocate\_operations(n)} & Preallocate space for \code{n} operations\\ \end{tabular} \subsection*{Query functions in \code{adept} namespace} \begin{tabular}{ll} \code{active\_stack()} & Return pointer to currently active \code{Stack} object\\ \code{version()} & Return \code{std::string} with Adept version number\\ \code{configuration()} & Return \code{std::string} describing Adept configuration\\ \code{have\_matrix\_multiplication()} & Adept compiled with matrix multiplication (BLAS)?\\ \code{have\_linear\_algebra()} & Adept compiled with linear-algebra (LAPACK)?\\ \code{set\_max\_blas\_threads(n)} & Set maximum threads for matrix operations\\ \code{max\_blas\_threads()} & Get maximum threads for matrix operations\\ \code{is\_thread\_unsafe()} & Global \code{Stack} object is \textit{not} thread-local?\\ \end{tabular} \newpage %\section*{Array functionality} \subsection*{Dense dynamic array types} \begin{tabular}{ll} \code{Vector}, \code{Matrix}, \code{Array3D}, \code{Array4D}... \code{Array7D} & Arrays of type \code{Real}\\ \code{intVector}, \code{intMatrix}, \code{intArray3D}... \code{intArray7D}& Arrays of type \code{int}\\ \code{boolVector}, \code{boolMatrix}, \code{boolArray3D}... \code{boolArray7D}& Arrays of type \code{bool}\\ \code{floatVector}, \code{floatMatrix}, \code{floatArray3D}... \code{floatARray7D} & Arrays of type \code{float}\\ \code{aVector}, \code{aMatrix}, \code{aArray3D}... \code{aArray7D} & Active arrays of type \code{Real}\\ \end{tabular} \myvskip Define new dynamic array types as follows:\\ \begin{tabular}{l} \code{typedef Array shortMatrix;}\\ \code{typedef Array afloatArray3D;} \end{tabular} \subsection*{Dense fixed-size array types} \begin{tabular}{ll} \code{Vector2}, \code{Vector3}, \code{Vector4} & Passive vectors of fixed length 2--4\\ \code{Matrix22}, \code{Matrix33}, \code{Matrix44} & Passive matrices of fixed size 2$\times$2, 3$\times$3, 4$\times$4\\ \code{aVector2}, \code{aVector3}, \code{aVector4} & Active vectors of fixed length 2--4\\ \code{aMatrix22}, \code{aMatrix33}, \code{aMatrix44} & Active matrices of fixed size 2$\times$2, 3$\times$3, 4$\times$4\\ \end{tabular} \myvskip Define new fixed array types as follows:\\ \begin{tabular}{l} \code{typedef FixedArray shortMatrix24;}\\ \code{typedef FixedArray aArray333;} \end{tabular} \subsection*{Special square matrix types} \begin{tabular}{ll} \code{SymmMatrix}, \code{aSymmMatrix} & Symmetric matrix\\ \code{DiagMatrix}, \code{aDiagMatrix} & Diagonal matrix\\ \code{TridiagMatrix}, \code{aTridiagMatrix} & Tridiagonal matrix\\ \code{PentadiagMatrix}, \code{aPentadiagMatrix} & Pentadiagonal matrix\\ \code{LowerMatrix}, \code{aLowerMatrix} & Lower-triangular matrix\\ \code{UpperMatrix}, \code{aUpperMatrix} & Upper-triangular matrix\\ \end{tabular} \subsection*{Dense dynamic array constructors} \begin{tabular}{ll} \code{Matrix M;} & Create an empty matrix of type \code{Real}\\ \code{Matrix N(M);} & Create matrix sharing data with existing matrix\\ \code{Matrix N = M;} & ...as above\\ \code{Matrix N(3,4);} & Create matrix with size 3$\times$4\\ \code{Matrix N(dimensions(3,4));} & ...as above\\ \code{Matrix N(M.dimensions());} & Create matrix with the same size as \code{M}\\ \code{Matrix N(ptr,dimensions(3,4));} & Create 3$\times$4 matrix sharing data from pointer \code{ptr}\\ \code{Matrix N = log(M);} & Create matrix containing copy of right-hand-side\\ \code{Matrix N = \{\{1.0,2.0\},\{3.0,4.0\}\};} & Create 2$\times$2 matrix from initializer list (C++11)\\ \end{tabular} \subsection*{Array resize and link member functions} \begin{tabular}{ll} \code{.clear()} & Return array to original empty state\\ \code{.resize(3,4)} & Resize array discarding data\\ \code{.resize(dimensions(3,4))} & ...as above\\ \code{.resize\_row\_major(3,4)} & Resize with row-major storage (default)\\ \code{.resize\_column\_major(3,4)} & Resize with column-major storage\\ \code{.resize(M.dimensions())} & Resize to same as \code{M}\\ \code{.resize\_contiguous(...)} & Resize guaranteeing contiguous storage\\ \code{N >{}>= M;} & Discard existing data and link to array on right-hand-side\\ \end{tabular} \subsection*{Array query member functions} \begin{tabular}{ll} \code{::rank} & Number of array dimensions\\ \code{.empty()} & Return \code{true} if array is empty, \code{false} otherwise\\ \code{.dimensions()} & Return an object that can be used to resize other arrays\\ \code{.dimension(i)} & Return length of dimension \code{i} (0 based)\\ \code{.size()} & Return total number of elements\\ \code{.data()} & Return pointer to underlying passive data\\ \code{.const\_data()} & Return \code{const} pointer to underlying data\\ \end{tabular} \subsection*{Array filling} \begin{tabular}{ll} \code{M = 1.0;} & Fill all elements of array with the same number\\ \code{M <{}< 1.0, 2.0, 3.0, 4.0;} & Fill first four elements of array\\ \code{M = \{\{1.0,2.0\},\{3.0,4.0\}\};} & Fill 2$\times$2 matrix (C++11)\\ \end{tabular} \subsection*{Array indexing and slicing} Dense arrays can be indexed/sliced using the function-call operator with as many arguments as there are dimensions (e.g.\ index a matrix with \code{M(i,j)}). In all cases a slice can be used as an lvalue or rvalue. If all arguments are scalars then a single element of the array is extracted. The following special values are available:\\ \begin{tabular}{ll} \code{end} & The last element of the dimension being indexed\\ \code{end-1} & Penultimate element of indexed dimension (any integer arithmetic possible)\\ \end{tabular} If one or more argument is a \textit{regular index range} then the return type will be an \code{Array} pointing to part of the original array. For every scalar argument, its rank will be reduced by one compared to the original array. The available ranges are:\\ \begin{tabular}{ll} \code{\_\_} & All elements of indexed dimension \\ \code{range(ibeg,iend)} & Contiguous range from \code{ibeg} to \code{iend}\\ \code{stride(ibeg,iend,istride)} & Strided range (\code{istride} can be negative but not zero)\\ \end{tabular} If any of the arguments is a \textit{irregular index range} (such as an \code{intVector} containing an arbitrary list of indices) then the return type will be an \code{IndexedArray}. If used as an lvalue, it will modify the original array, but if passed into a function receiving an \code{Array} type then any modifications inside the function will not affect the original array. \subsection*{Passing arrays to and from functions} There are three ways an array can be received as an argument to a function:\\ \begin{tabular}{ll} \code{Matrix\&} & For an array that might be resized in the function\\ \code{Matrix} & For an array or array slice to be modified inside the function\\ \code{const Matrix\&} & For a read-only array, array slice or array expression\\ \end{tabular} \subsection*{Member functions returning lvalue} The functions in this section return an \code{Array} that links to the original data and can be used on the left- or right-hand-side of an assignment. The following only work on dynamic or fixed-size dense arrays:\\ \begin{tabular}{ll} \code{.subset(ibeg0,iend0,ibeg1,iend1,...)} & Contiguous subset\\ \code{.permute(i0,i1,...)} & Permute dimensions\\ \code{.diag\_matrix()} & For vector, return \code{DiagMatrix}\\ \code{.soft\_link()} \\ \end{tabular} The following works on any matrix:\\ \begin{tabular}{ll} \code{.T()} & Transpose of matrix\\ \end{tabular} The following work only with square matrices, including special square matrices\\ \begin{tabular}{ll} \code{.diag\_vector()} & Return vector linked to its diagonals\\ \code{.diag\_vector(i)} & Return vector linked to offdiagonal \code{i}\\ \code{.submatrix\_on\_diagonal(ibeg,iend)} & Return square matrix lying on diagonal\\ \end{tabular} \subsection*{Elemental mathematical functions} Return passive part of active object: \code{value(x)} \hangingpar Binary operators: \code{+}, \code{-}, \code{*} and \code{/}. \hangingpar Assignment operators: \code{+=}, \code{-=}, \code{*=} and \code{/=}. \hangingpar Unary functions: \code{sqrt}, \code{exp}, \code{log}, \code{log10}, \code{sin}, \code{cos}, \code{tan}, \code{asin}, \code{acos}, \code{atan}, \code{sinh}, \code{cosh}, \code{tanh}, \code{abs}, \code{asinh}, \code{acosh}, \code{atanh}, \code{expm1}, \code{log1p}, \code{cbrt}, \code{erf}, \code{erfc}, \code{exp2}, \code{log2}, \code{round}, \code{trunc}, \code{rint}, \code{nearbyint} and \code{fastexp}. \hangingpar Binary functions: \code{pow}, \code{atan2}, \code{min}, \code{max}, \code{fmin} and \code{fmax}. \hangingpar Unary functions returning \code{bool} expressions: \code{isfinite}, \code{isinf} and \code{isnan}. \hangingpar Binary operators returning \code{bool} expressions: \code{==}, \code{!=}, \code{>}, \code{<}, \code{>=} and \code{<=}. \subsection*{Alias-related functions} \begin{tabular}{ll} \code{eval(E)} & Avoid aliasing by evaluating expression \code{E} into an array\\ \code{noalias(E)} & Turn off alias checking for expression \code{E}\\ \end{tabular} \subsection*{Reduction functions} \begin{tabular}{ll} \code{sum(M)} & Return the sum of all elements in \code{M}\\ \code{sum(M,i)} & Return array of rank one less than \code{M} containing sum along \code{i}th dimension (0 based)\\ \end{tabular} \hangingpar Other reduction functions working in the same way: \code{mean}, \code{product}, \code{minval}, \code{maxval}, \code{norm2}. \begin{tabular}{ll} \code{dot\_product(x,y)} & The same as \code{sum(a*b)} for rank-1 arguments\\ \end{tabular} \subsection*{Expansion functions} \begin{tabular}{ll} \code{spread(M,n)} & Replicate \code{M} array expression \code{n} times along dimension \code{d}\\ \code{outer\_product(x,y)} & Return rank-2 outer product from two rank-1 arguments\\ \end{tabular} \subsection*{Matrix multiplication and linear algebra} \begin{tabular}{ll} \code{transpose(M)} & Transpose matrix or 2D matrix expression\\ \code{matmul(M,N)} & Matrix multiply, where at least one argument must be a matrix, and \\ &orientation of any vector arguments is inferred\\ \code{M ** N} & Shortcut for \code{matmul}; precedence is the same as normal multiply\\ \code{inv(M)} & Inverse of square matrix\\ \code{solve(A,x)} & Solve system of linear equations\\ \end{tabular} \subsection*{Preprocessor variables} The following can be defined to change the behaviour of your code:\\ \begin{tabular}{ll} \code{ADEPT\_STACK\_THREAD\_UNSAFE} & Thread-unsafe \code{Stack} (faster)\\ \code{ADEPT\_RECORDING\_PAUSABLE} & Recording can be paused (slower)\\ \code{ADEPT\_NO\_AUTOMATIC\_DIFFERENTIATION} & Turn off differentiation\\ \code{ADEPT\_TRACK\_NON\_FINITE\_GRADIENTS} & Exception thrown if derivative non-finite\\ \code{ADEPT\_BOUNDS\_CHECKING} & Check array bounds (slower)\\ \code{ADEPT\_NO\_ALIAS\_CHECKING} & Turn off alias checking (faster)\\ \code{ADEPT\_NO\_DIMENSION\_CHECKING} & Turn off dimension checking (faster)\\ \code{ADEPT\_INIT\_REAL\_SNAN} & Initialize real numbers to signaling NaN\\ \code{ADEPT\_INIT\_REAL\_ZERO} & Initialize real numbers to zero\\ \code{ADEPT\_FAST\_EXPONENTIAL} & Use faster vectorizable exponential\\ \code{ADEPT\_FAST\_SCALAR\_EXPONENTIAL} & Provide faster \code{adept::exp} for scalars\\ \code{ADEPT\_FAST} & Enable bit-reproducible options\\ \code{ADEPT\_STORAGE\_THREAD\_SAFE} & Thread-safe array storage (slower)\\ \code{ADEPT\_SUPPORT\_HUGE\_ARRAYS} & Use \code{std::size\_t} for array dimensions\\ \code{ADEPT\_REAL\_TYPE\_SIZE} & Size of \code{Real}: 4 or 8 (default 8) \end{tabular} The \code{ADEPT\_VERSION} variable contains version number as an integer, e.g.\ \code{20108}, while \code{ADEPT\_VERSION\_STR} contains it as a string, e.g.\ ``2.0.8''. \onecolumn \newpage \def\Y{\textbf{Y}} \def\r#1{\rotatebox{90}{#1}} \setlength{\topmargin}{-3cm} \begin{table}[tb!] %\caption{ \begin{center} %\parbox{0.9\columnwidth}{ \mysize\myfont Comparison of array syntax between Fortran 90 (and later), Matlab and the C++ libraries Adept and Eigen %In these examples, \code{v} and \code{w} are vectors % and \code{A} and \code{B} are matrices. %} \footnotesize \myfont \begin{tabular}{lllll} \hline {\large\phantom{X}} & \mysize Fortran 90+ & \mysize Matlab & \mysize C++ Adept (with C++11 features) & \mysize C++ Eigen \\ \hline Maximum dimensions & 7 (15 from Fortran 2008) & Unlimited & 7 & 2 \\ \hline Vector declaration & \code{real,dimension(:)} & & \code{Vector} & \code{VectorXd} \\ Matrix declaration & \code{real,dimension(:,:)} & & \code{Matrix} & \code{MatrixXd, ArrayXd} \\ 3D array declaration & \code{real,dimension(:,:,:)}& & \code{Array3D} \\ Fixed matrix declaration & \code{real,dimension(M,N)} & & \code{FixedMatrix} & \code{Matrix} \\ Diagonal matrix declaration& & & \code{DiagMatrix} & \code{DiagonalMatrix} \\ %Tridiagonal matrix & %& %& %\code{TridiagMatrix} & %\\ Symmetric matrix decl.& & & \code{SymmMatrix} \\ %Upper-triangular matrix & %& %& %\code{UpperMatrix} & %\\ Sparse matrix declaration& & %\code{sparse(A)} & & \code{SparseMatrix} \\ \hline Get rank & \code{rank(A)} & \code{ndims(A)} & \code{A::rank} \\ Get total size & \code{size(A)} & \code{numel(A)} & \code{A.size()} & \code{A.size()} \\ Get size of dimension & \code{size(A,i)} & \code{size(A,i)} & \code{A.size(i)} & \code{A.rows()}, \code{A.cols()} \\ Get all dimensions & \code{shape(A)} & \code{size(A)} & \code{A.dimensions()} \\ \hline Resize & \code{allocate(A(m,n))} & \code{A = zeros(m,n)} & \code{A.resize(m,n)} & \code{A.resize(m,n)} \\ Clear & \code{deallocate(A)} & \code{A = []} & \code{A.clear()} & \code{A.resize(0,0)} \\ Link/associate & \code{A => B} & & \code{A >{}>= B} & %Low-level access via \code{Map} (Complicated) \\ \hline Set elements to constant & \code{A = x} & \code{A(:) = x} & \code{A = x} & \code{A.fill(x)} \\ Fill vector with data & \code{v = [0,1]} & \code{v = [0,1]} & \code{v <{}< 0,1} & \code{v <{}< 0,1} \\ Fill matrix with data & \code{A=reshape([0,1,2,3],[2,2])} & \code{A = [1 2; 3 4]} & \code{A <{}< 1,2,3,4} or \code{A = \{\{1,2\},\{3,4\}\}} & \code{A <{}< 1,2,3,4} \\ Vector literal & \code{[1.0, 2.0]} & \code{[1.0 2.0]} & \code{Vector\{1.0, 2.0\}} & \\ \hline Vector subset & \code{v(i1:i2)} & \code{v(i1:i2)} & \code{v.subset(i1,i2)} & \code{v.segment(i1,m)} %\code{Map w(v.data()+1,8)} \\ Strided indexing & \code{v(i1:i2:s)} & \code{v(i1:s:i2)} & \code{v(stride(i1,i2,s))} & %\code{Map > w(v.data()+1,4,InnerStride<2>)} (Complicated) \\ Vector end indexing & \code{v(i:)} & \code{v(i:end)} & \code{v.subset(i,end)} & \code{v.tail(n)} \\ Index relative to end & & \code{v(end-1)} & \code{v(end-1)} & \\ Index by int vector & \code{v(index)} & \code{v(index)} & \code{v(index)} \\ \hline Matrix subset & \code{A(i1:i2,j1:j2)} & \code{A(i1:i2,j1:j2)} & \code{A.subset(i1,i2,j1,j2)} & \code{A.block(i1,j1,m,n)} \\ Extract row & \code{A(i,:)} & \code{A(i,:)} & \code{A(i,\_\_)}, \code{A[i]} & \code{A.row(i)} \\ Matrix end block & \code{M(i:,j:)} & \code{M(i:end,j:end)} & \code{M.subset(i,end,j,end)} & \code{M.bottomRightCorner(m,n)} \\ Diagonal matrix from vector & & \code{diag(v)} & \code{v.diag\_matrix()} & \code{v.asDiagonal()} \\ Matrix diagonals as vector & & \code{diag(A)} & \code{A.diag\_vector()} & \code{A.diagonal()} \\ Matrix off-diagonals & & \code{diag(A,i)} & \code{A.diag\_vector(i)} & \code{A.diagonal(i)} %\\ %Symmetric view & %& %& %\code{%\color{red} %A.symm\_matrix() %}& %\code{A.selfAdjointView()} %\\ %Upper-triangular view & %& %& %\code{\color{red}A.upper\_matrix()} & %\code{A.triangularView()} \\ \hline Elementwise multiplication & \code{A * B} & \code{A .* B} & \code{A * B} & \code{A.array() * B.array()} \\ Elemental function & \code{sqrt(A)} & \code{sqrt(A)} & \code{sqrt(A)} & \code{A.array().sqrt()} \\ Addition assignment & \code{A = A + B} & \code{A = A + B} & \code{A += B} & \code{A.array() += B} \\ Power & \code{A ** B} & \code{A .\textasciicircum\ C} & \code{pow(A,B)} & \code{A.array().pow(B)} \\ \hline Matrix multiplication & \code{matmul(A,B)} & \code{A * B} & \code{A ** B} & \code{A * B} \\ Dot product & \code{dot\_product(v,w)} & \code{dot(v,w)} & \code{dot\_product(v,w)} & \code{v.dot(w)} \\ Matrix transpose & \code{transpose(A)} & \code{A'} & \code{A.T()} & \code{A.transpose()} \\ In-place transpose & & & \code{A.in\_place\_transpose()} & \code{A.transposeInPlace()} \\ Matrix solve & & \code{A \textbackslash\ b} & \code{solve(A,b)} & \code{A.colPivHouseholderQr().solve(b)} \\ Matrix inverse & & \code{inv(A)} & \code{inv(A)} & \code{A.inverse()} \\ \hline ``Find'' conditional assign & & \code{v(find(w<0)) = 0} & \code{v(find(w<0)) = 0} \\ ``Where'' conditional assign & \code{where(w<0) v = 0} & & \code{v.where(w<0) = 0} & \code{v = (w<0).select(0,v)} \\ ``Where'' with both cases & \code{...elsewhere v = 1} & & \code{v.where(w<0)=either\_or(0,1)} & \code{v = (w<0).select(0,1)} \\ \hline Average all elements & \code{mean(A)} & \code{mean(A(:)} & \code{mean(A)} & \code{A.mean()} \\ Average along dimension & \code{mean(A,i)} & \code{mean(A,i)} & \code{mean(A,i)} & \code{A.colwise().mean()} \\ Maximum of all elements & \code{maxval(A)} & \code{max(A(:))} & \code{maxval(A)} & \code{A.maxCoeff()} \\ Maximum of two arrays & \code{max(A,B)} & (Complicated) & \code{max(A,B)}, \code{fmax(A,B)} & \code{A.max(B)} \\ Spread along new dimension & \code{spread(A,dim,n)} & & \code{spread(A,n)} \\ \hline \end{tabular} \end{center} \end{table} \end{document} ================================================ FILE: include/Makefile.am ================================================ include_HEADERS = adept.h adept_arrays.h adept_optimize.h adept_source.h adept_fortran.h pkginclude_HEADERS = adept/Active.h adept/ActiveReference.h adept/Allocator.h \ adept/Array.h adept/Expression.h adept/ExpressionSize.h \ adept/IndexedArray.h adept/matmul.h adept/RangeIndex.h \ adept/ScratchVector.h adept/SpecialMatrix.h adept/Stack.h \ adept/StackStorage.h adept/StackStorageOrig.h \ adept/StackStorageOrigStl.h adept/Statement.h adept/Storage.h \ adept/array_shortcuts.h adept/base.h adept/reduce.h \ adept/contiguous_matrix.h adept/exception.h adept/settings.h \ adept/interp.h adept/ActiveConstReference.h adept/cppblas.h \ adept/scalar_shortcuts.h adept/solve.h adept/traits.h adept/where.h \ adept/vector_utilities.h adept/FixedArray.h adept/Packet.h \ adept/UnaryOperation.h adept/BinaryOperation.h adept/ArrayWrapper.h \ adept/outer_product.h adept/spread.h adept/inv.h adept/eval.h \ adept/noalias.h adept/store_transpose.h adept/quick_e.h \ adept/GradientIndex.h adept/Optimizable.h adept/Minimizer.h EXTRA_DIST = Timer.h create_adept_source_header adept_source.h: @top_srcdir@/adept/*.h @top_srcdir@/adept/*.cpp @srcdir@/create_adept_source_header @srcdir@/create_adept_source_header all-local: adept_source.h ================================================ FILE: include/Timer.h ================================================ /* Timer.h - Utility class for timing different parts of a program Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #ifndef Timer_H #define Timer_H 1 #ifdef _WIN32 #include #include #else #include #endif #include #include #include #include #include // The Timer class: all functions are inline class Timer { public: typedef int TimerInt; // Constructor can specify a number of unnamed activities Timer(TimerInt n_activities = 0) : current_activity_(-1), timer_on_(false), print_on_exit_(false) { #ifdef _WIN32 win_last_time_.QuadPart = 0; #else last_time_.tv_sec = 0; last_time_.tv_usec = 0; #endif timings_.reserve(100); names_.reserve(100); for (TimerInt i = 0; i < n_activities; i++) { std::stringstream s; s << "Activity " << i; timings_.push_back(0.0); names_.push_back(s.str()); } } // When the timer is destructed (typically at program exit), print // out the times spent in each activity ~Timer() { if (print_on_exit_) { print(); } } // Print out the times spent in each activity void print() { double sum = 0.0; std::cerr << size() << " activities:\n"; for (TimerInt i = 0; i < size(); i++) { std::cerr.width(10); std::cerr << std::right << timings_[i] << " s: " << names_[i] << "\n"; sum += timings_[i]; } std::cerr.width(10); std::cerr << std::right << sum << " s: Total\n"; } // Register a new activity with the specified name, returning the // tag to be used to specify it in future, as a TimerInt TimerInt new_activity(const std::string& name) { TimerInt tag = size(); names_.push_back(name); timings_.push_back(0.0); return tag; } // Stop timing current activity void stop() { if (timer_on_) { timings_[current_activity_] += split_(); } timer_on_ = false; }; // Start timing specified activity void start(TimerInt activity) { if (timer_on_) { timings_[current_activity_] += split_(); } else { split_(); } if (activity >= 0 && activity < size()) { current_activity_ = activity; timer_on_ = true; } else { // Activity out of range - to keep this inline function fast we // don't throw an exception but just don't record the time for // this event timer_on_ = false; } }; // Set the timing for a specific activity back to zero void reset(TimerInt activity) { if (activity >= 0 && activity < size()) { timings_[activity] = 0.0; } } // Return the list of timings in seconds as a constant reference to // a vector of doubles const std::vector& timings() { return timings_; } // Return a single timing double timing(TimerInt activity) { if (activity >= 0 && activity < size()) { return timings_[activity]; } else { return 0.0; } } // Convert from size_t to int TimerInt size() { return timings_.size(); } // Decide whether the contents of the timer class will be printed // when it is destructed void print_on_exit(bool b = true) { print_on_exit_ = b; } private: // Use Unix system call to get the time accurately double split_() { #ifdef _WIN32 using namespace std; QueryPerformanceFrequency(&frequency); QueryPerformanceCounter(&win_time_); double dsec = (double) (win_time_.QuadPart - win_last_time_.QuadPart) / (double) frequency.QuadPart; win_last_time_ = win_time_; return dsec; #else struct timeval time; gettimeofday(&time, NULL); double dsec = time.tv_sec - last_time_.tv_sec + 0.000001 * (time.tv_usec - last_time_.tv_usec); last_time_ = time; return dsec; #endif } // Data std::vector timings_; std::vector names_; TimerInt current_activity_; #ifdef _WIN32 LARGE_INTEGER frequency; // ticks per second LARGE_INTEGER win_time_, win_last_time_; // ticks #else timeval last_time_; #endif bool timer_on_; bool print_on_exit_; }; #endif ================================================ FILE: include/adept/Active.h ================================================ /* Active.h -- Active scalar type for automatic differentiation Copyright (C) 2012-2014 University of Reading Copyright (C) 2015-2018 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The Active class describes a scalar variable that can participate in expressions to be differentiated. It is a generalization of the aReal (or adouble) class in Adept 1.0, which was always double precision; Active takes a template argument T that is any floating-point type. */ #ifndef AdeptActive_H #define AdeptActive_H #include #include #include #include #include #include namespace adept { // --------------------------------------------------------------------- // Definition of Active class // --------------------------------------------------------------------- template class Active : public Expression > { // CONTENTS // 1. Preamble // 2. Constructors // 3. Operators // 4. Public member functions that don't modify the object // 5. Public member functions that modify the object // 6. Protected member functions // 7. Data public: // ------------------------------------------------------------------- // 1. Preamble // ------------------------------------------------------------------- // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = true; static const bool is_lvalue = true; static const int rank = 0; static const int n_active = 1 + internal::is_complex::value; static const int n_arrays = 0; static const int n_scratch = 0; typedef Type T; // Needed so that ADEPT_INIT_REAL_SNAN works // ------------------------------------------------------------------- // 2. Constructors // ------------------------------------------------------------------- // Constructor registers the new Active object with the currently // active stack. Note that this object is not explicitly // initialized with a particular number; the user should not // assume that it is set to zero but should later assign it to a // particular value. Otherwise in the reverse pass the // corresponding gradient will not be set to zero. #ifdef ADEPT_INIT_REAL Active() : val_(ADEPT_INIT_REAL), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { } #else Active() : val_(0.0), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { } #endif // Constructor with a passive argument; this constructor is // invoked with either of the following: // aReal x = 1.0; // aReal x(1.0); template Active(const PType& rhs, typename internal::enable_if::value>::type* dummy = 0) : val_(rhs), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { // By pushing this to the statement stack without pushing // anything on to the operation stack we ensure that in the // reverse pass the gradient of this object will be set to zero // after it has been manipulated. This is important because the // gradient entry might be reused. #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // Constructor taking an element from an active array: the value // and gradient_index of the element are provided template Active(const PType& rhs, Index gradient_index) : val_(rhs), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif ADEPT_ACTIVE_STACK->push_rhs(1.0,gradient_index); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // Constructor with an active argument // Normal copy construction: register the new object then treat // this as an assignment. We need two versions because if we // don't provide the first then the compiler will provide it and // not use the second if Type==AType Active(const Active& rhs) : val_(0.0), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { *this = rhs; } template Active(const Active& rhs) : val_(0.0), gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { *this = rhs; } // Construction with an expression. This is primarily used so // that if we define a function func(aReal a), it will also accept // active expressions by implicitly converting them to an aReal. template // explicit Active(const Expression& rhs, typename internal::enable_if::type* dummy = 0) : gradient_index_(ADEPT_ACTIVE_STACK->register_gradient()) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is enough space in the operation stack ADEPT_ACTIVE_STACK->check_space_static(); #endif // Get the value and push the gradients on to the operation // stack, thereby storing the right-hand-side of the statement val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); // Push the gradient offet of this object on to the statement // stack, thereby storing the left-hand-side of the statement ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif } // Destructor simply unregisters the object from the stack, // freeing up the gradient index for another ~Active() { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif ADEPT_ACTIVE_STACK->unregister_gradient(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // ------------------------------------------------------------------- // 3. Operators // ------------------------------------------------------------------- // Assignment operator with an inactive variable on the rhs template typename internal::enable_if::value, Active&>::type operator=(const PType& rhs) { val_ = rhs; // Pushing the gradient index on to the statement stack with no // corresponding operations ensures that the gradient will be // set to zero in the reverse pass when it is finished with #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif return *this; } // Assignment operator with an active variable on the rhs: first a // non-template version because otherwise compiler will generate // its own Active& operator=(const Active& rhs) { // Check there is space in the operation stack for one more // entry #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION ADEPT_ACTIVE_STACK->check_space(1); #endif // Same as construction with an expression (defined above) val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif return *this; } // Assignment operator with an active variable on the rhs template Active& operator=(const Active& rhs) { // Check there is space in the operation stack for one more // entry #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION ADEPT_ACTIVE_STACK->check_space(1); #endif // Same as construction with an expression (defined above) val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif return *this; } // Assignment operator with an expression on the rhs: very similar // to construction with an expression (defined above) template typename internal::enable_if::type operator=(const Expression& rhs) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION ADEPT_ACTIVE_STACK->check_space_static(); #endif val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif return *this; } // All the compound assignment operators are unpacked, i.e. a+=b // becomes a=a+b; first for an Expression on the rhs template typename internal::enable_if::type operator+=(const Expression& rhs) { return *this = (*this + rhs); } template typename internal::enable_if::type operator-=(const Expression& rhs) { return *this = (*this - rhs); } template typename internal::enable_if::type operator*=(const Expression& rhs) { return *this = (*this * rhs); } template typename internal::enable_if::type operator/=(const Expression& rhs) { return *this = (*this / rhs); } // And likewise for a passive scalar on the rhs template typename internal::enable_if::value, Active&>::type operator+=(const PType& rhs) { val_ += rhs; return *this; } template typename internal::enable_if::value, Active&>::type operator-=(const PType& rhs) { val_ -= rhs; return *this; } template typename internal::enable_if::value, Active&>::type operator*=(const PType& rhs) { return *this = (*this * rhs); } template typename internal::enable_if::value, Active&>::type operator/=(const PType& rhs) { return *this = (*this / rhs); } // ------------------------------------------------------------------- // 4. Public member functions that don't modify the object // ------------------------------------------------------------------- // Get the underlying passive value of this object Type value() const { return val_; } // Get the index of the gradient information for this object const Index& gradient_index() const { return gradient_index_; } // If an expression leads to calc_gradient being called on an // active object, we push the multiplier and the gradient index on // to the operation stack (or 1.0 if no multiplier is specified template void calc_gradient(Stack& stack, const ExpressionSize&) const { stack.push_rhs(1.0, gradient_index_); } template void calc_gradient(Stack& stack, const MyType& multiplier, const ExpressionSize&) const { stack.push_rhs(multiplier, gradient_index_); } // Set the value of the gradient, for initializing an adjoint; // note that the value of the gradient is not held in the active // object but rather held by the stack template void set_gradient(const MyType& gradient) const { return ADEPT_ACTIVE_STACK->set_gradients(gradient_index_, gradient_index_+1, &gradient); } // Get the value of the gradient, for extracting the adjoint after // calling reverse() on the stack template void get_gradient(MyType& gradient) const { return ADEPT_ACTIVE_STACK->get_gradients(gradient_index_, gradient_index_+1, &gradient); } Type get_gradient() const { Type gradient = 0; ADEPT_ACTIVE_STACK->get_gradients(gradient_index_, gradient_index_+1, &gradient); return gradient; } // For modular codes, some modules may have an existing // Jacobian code and possibly be unsuitable for automatic // differentiation using Adept (e.g. because they are written in // Fortran). In this case, we can use the following two functions // to "wrap" the non-Adept code. // Suppose the non-adept code uses the double values from n aReal // objects pointed to by "x" to produce a single double value // "y_val" (to be assigned to an aReal object "y"), plus a pointer // to an array of forward derivatives "dy_dx". Firstly you should // assign the value using simply "y = y_val;", then call // "y.add_derivative_dependence(x, dy_dx, n);" to specify how y // depends on x. A fourth argument "multiplier_stride" may be used // to stride the indexing to the derivatives, in case they are // part of a matrix that is oriented in a different sense. template typename internal::enable_if::value, void>::type add_derivative_dependence(const Active* rhs, const MyReal* multiplier, int n, int multiplier_stride = 1) const { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries ADEPT_ACTIVE_STACK->check_space(n); #endif for (int i = 0; i < n; i++) { Real mult = multiplier[i*multiplier_stride]; if (mult != 0.0) { // For each non-zero multiplier, add a pseudo-operation to // the operation stack ADEPT_ACTIVE_STACK->push_rhs(mult, rhs[i].gradient_index()); } } ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // Suppose the non-Adept code uses double values from n aReal // objects pointed to by "x" and m aReal objects pointed to by "z" // to produce a single double value, plus pointers to arrays of // forward derivatives "dy_dx" and "dy_dz". Firstly, as above, // you should assign the value using simply "y = y_val;", then // call "y.add_derivative_dependence(x, dy_dx, n);" to specify how // y depends on x. To specify also how y depends on z, call // "y.append_derivative_dependence(z, dy_dz, n);". template typename internal::enable_if::value, void>::type append_derivative_dependence(const Active* rhs, const MyReal* multiplier, int n, int multiplier_stride = 1) const { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries ADEPT_ACTIVE_STACK->check_space(n); #endif for (int i = 0; i < n; ++i) { Real mult = multiplier[i*multiplier_stride]; if (mult != 0.0) { // For each non-zero multiplier, add a pseudo-operation to // the operation stack ADEPT_ACTIVE_STACK->push_rhs(mult, rhs[i].gradient_index()); } } if (!(ADEPT_ACTIVE_STACK->update_lhs(gradient_index_))) { throw wrong_gradient("Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call" ADEPT_EXCEPTION_LOCATION); } #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // For only one independent variable on the rhs, these two // functions are convenient as they don't involve pointers template void add_derivative_dependence(const T& rhs, Real multiplier) const { ADEPT_ACTIVE_STACK->add_derivative_dependence(gradient_index_, rhs.gradient_index(), multiplier); } template void append_derivative_dependence(const T& rhs, Real multiplier) const { ADEPT_ACTIVE_STACK->append_derivative_dependence(gradient_index_, rhs.gradient_index(), multiplier); } // ------------------------------------------------------------------- // 4.1. Public member functions used by other expressions // ------------------------------------------------------------------- bool get_dimensions_(ExpressionSize<0>& dim) const { return true; } std::string expression_string_() const { std::stringstream s; s << "Active(" << val_ << ")"; return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return false; } Type value_with_len_(const Index& j, const Index& len) const { return val_; } template void advance_location_(ExpressionSize& loc) const { } template Type value_at_location_(const ExpressionSize& loc) const { return val_; } template Type value_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return val_; } template Type value_stored_(const ExpressionSize& loc, const internal::ScratchVector& scratch) const { return val_; } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch) const { stack.push_rhs(1.0, gradient_index_); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch, const MyType& multiplier) const { stack.push_rhs(multiplier, gradient_index_); } template void set_location_(const ExpressionSize& i, ExpressionSize& index) const {} // The Stack::independent(x) and Stack::dependent(y) functions add // the gradient_index of objects x and y to std::vector // objects in Stack. Since x and y may be scalars or arrays, this // is best done by delegating to the Active or Array classes. template void push_gradient_indices(std::vector& vec) const { vec.push_back(gradient_index_); } // ------------------------------------------------------------------- // 5. Public member functions that modify the object // ------------------------------------------------------------------- // Set the value template void set_value(const MyType& x) { val_ = x; } // For use in creating active references, to get a non-const // reference to the underlying passive data Type& lvalue() { return val_; } // ------------------------------------------------------------------- // 6. Protected member functions // ------------------------------------------------------------------- protected: // ------------------------------------------------------------------- // 7. Data // ------------------------------------------------------------------- private: Type val_; // The numerical value Index gradient_index_; // Index to where the corresponding // gradient will be held during the // adjoint calculation }; // End of definition of Active // --------------------------------------------------------------------- // Helper function for Active class // --------------------------------------------------------------------- // A way of setting the initial values of an array of n aReal // objects without the expense of placing them on the stack template inline void set_values(Active* a, Index n, const Type* data) { for (Index i = 0; i < n; i++) { a[i].set_value(data[i]); } } // Extract the values of an array of n aReal objects template inline void get_values(const Active* a, Index n, Type* data) { for (Index i = 0; i < n; i++) { data[i] = a[i].value(); } } // Set the initial gradients of an array of n aReal objects; this // should be done after the algorithm has called and before the // Stack::forward or Stack::reverse functions are called template inline void set_gradients(Active* a, Index n, const Type* data) { for (Index i = 0; i < n; i++) { a[i].set_gradient(data[i]); } } // Extract the gradients from an array of aReal objects after the // Stack::forward or Stack::reverse functions have been called template inline void get_gradients(const Active* a, Index n, Type* data) { for (Index i = 0; i < n; i++) { a[i].get_gradient(data[i]); } } // Print an active scalar to a stream template inline std::ostream& operator<<(std::ostream& os, const Active& v) { os << v.value(); return os; } // Print an active scalar expression to a stream template inline typename internal::enable_if::type operator<<(std::ostream& os, const Expression& expr) { os << expr.scalar_value(); return os; } namespace internal { // --------------------------------------------------------------------- // Definition of active_scalar // --------------------------------------------------------------------- // Return the active scalar version of Type if it is active, // otherwise just return Type template struct active_scalar { typedef Type type; }; template struct active_scalar { typedef Active type; }; } } // End namespace adept #endif ================================================ FILE: include/adept/ActiveConstReference.h ================================================ /* ActiveConstReference.h -- Const reference to an active element of an array Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. Provide an active scalar type where the data is actually a reference to an element of array. This enables an active array to be indexed such that the returned value can be used as an r-value and participate in expressions to be differentiated. */ #ifndef AdeptActiveConstReference_H #define AdeptActiveConstReference_H #include #include #include namespace adept { // --------------------------------------------------------------------- // Definition of ActiveReference class // --------------------------------------------------------------------- template class ActiveConstReference : public Expression > { // CONTENTS // 1. Preamble // 2. Constructors // 3. Operators // 4. Public member functions that don't modify the object // 5. Public member functions that modify the object // 6. Protected member functions // 7. Data public: // ------------------------------------------------------------------- // 1. Preamble // ------------------------------------------------------------------- // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = true; static const int rank = 0; static const int n_active = 1 + internal::is_complex::value; static const int n_arrays = 0; static const int n_scratch = 0; // ------------------------------------------------------------------- // 2. Constructors // ------------------------------------------------------------------- private: // There is only one way to construct an ActiveConstReference, so all // others that would otherwise be generated by the compiler are // made inaccessible ActiveConstReference() { } public: ActiveConstReference(const ActiveConstReference& rhs) : val_(rhs.value()), gradient_index_(rhs.gradient_index()) { } // In order to initialize this object, we pass in the gradient // index from the location in the array as the first argument. ActiveConstReference(const Type& val, Index gradient_index) : val_(val), gradient_index_(gradient_index) { } /* ActiveConstReference(const ActiveConstReference& rhs) : val_(const_cast&>(rhs).lvalue()), gradient_index_(rhs.gradient_index()) { } */ // Destructor does not unregister the object from the stack since // it is not the only reference to it. ~ActiveConstReference() { } // ------------------------------------------------------------------- // 3. Operators // ------------------------------------------------------------------- // Assignment operator with an active variable on the rhs: first a // non-template version because otherwise compiler will generate // its own; must be inaccessible private: ActiveConstReference& operator=(const ActiveConstReference& rhs) { } public: // ------------------------------------------------------------------- // 4. Public member functions that don't modify the object // ------------------------------------------------------------------- // Get the underlying passive value of this object const Type& value() const { return val_; } // Get the index of the gradient information for this object const Index& gradient_index() const { return gradient_index_; } // If an expression leads to calc_gradient being called on an // active object, we push the multiplier and the gradient index on // to the operation stack (or 1.0 if no multiplier is specified) template void calc_gradient(Stack& stack, const ExpressionSize&) const { stack.push_rhs(1.0, gradient_index_); } template void calc_gradient(Stack& stack, const MyType& multiplier, const ExpressionSize&) const { stack.push_rhs(multiplier, gradient_index_); } // Set the value of the gradient, for initializing an adjoint; // note that the value of the gradient is not held in the active // object but rather held by the stack template void set_gradient(const MyType& gradient) const { return ADEPT_ACTIVE_STACK->set_gradients(gradient_index_, gradient_index_+1, &gradient); } // Get the value of the gradient, for extracting the adjoint after // calling reverse() on the stack template void get_gradient(MyType& gradient) const { return ADEPT_ACTIVE_STACK->get_gradients(gradient_index_, gradient_index_+1, &gradient); } Type get_gradient() const { Type gradient = 0; ADEPT_ACTIVE_STACK->get_gradients(gradient_index_, gradient_index_+1, &gradient); return gradient; } // For modular codes, some modules may have an existing // Jacobian code and possibly be unsuitable for automatic // differentiation using Adept (e.g. because they are written in // Fortran). In this case, we can use the following two functions // to "wrap" the non-Adept code. // Suppose the non-adept code uses the double values from n aReal // objects pointed to by "x" to produce a single double value // "y_val" (to be assigned to an aReal object "y"), plus a pointer // to an array of forward derivatives "dy_dx". Firstly you should // assign the value using simply "y = y_val;", then call // "y.add_derivative_dependence(x, dy_dx, n);" to specify how y // depends on x. A fourth argument "multiplier_stride" may be used // to stride the indexing to the derivatives, in case they are // part of a matrix that is oriented in a different sense. void add_derivative_dependence(const Active* rhs, const Real* multiplier, int n, int multiplier_stride = 1) const { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries ADEPT_ACTIVE_STACK->check_space(n); #endif for (int i = 0; i < n; i++) { Real mult = multiplier[i*multiplier_stride]; if (mult != 0.0) { // For each non-zero multiplier, add a pseudo-operation to // the operation stack ADEPT_ACTIVE_STACK->push_rhs(mult, rhs[i].gradient_index()); } } ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // Suppose the non-Adept code uses double values from n aReal // objects pointed to by "x" and m aReal objects pointed to by "z" // to produce a single double value, plus pointers to arrays of // forward derivatives "dy_dx" and "dy_dz". Firstly, as above, // you should assign the value using simply "y = y_val;", then // call "y.add_derivative_dependence(x, dy_dx, n);" to specify how // y depends on x. To specify also how y depends on z, call // "y.append_derivative_dependence(z, dy_dz, n);". void append_derivative_dependence(const Active* rhs, const Real* multiplier, int n, int multiplier_stride = 1) const { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries ADEPT_ACTIVE_STACK->check_space(n); #endif for (int i = 0; i < n; i++) { Real mult = multiplier[i*multiplier_stride]; if (mult != 0.0) { // For each non-zero multiplier, add a pseudo-operation to // the operation stack ADEPT_ACTIVE_STACK->push_rhs(mult, rhs[i].gradient_index()); } } if (!(ADEPT_ACTIVE_STACK->update_lhs(gradient_index_))) { throw wrong_gradient("Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call" ADEPT_EXCEPTION_LOCATION); } #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // For only one independent variable on the rhs, these two // functions are convenient as they don't involve pointers template void add_derivative_dependence(T& rhs, Real multiplier) const { ADEPT_ACTIVE_STACK->add_derivative_dependence(gradient_index_, rhs.gradient_index(), multiplier); } template void append_derivative_dependence(T& rhs, Real multiplier) const { ADEPT_ACTIVE_STACK->append_derivative_dependence(gradient_index_, rhs.gradient_index(), multiplier); } // ------------------------------------------------------------------- // 4.1. Public member functions used by other expressions // ------------------------------------------------------------------- bool get_dimensions_(ExpressionSize<0>& dim) const { return true; } std::string expression_string_() const { std::stringstream s; s << "ActiveConstReference(" << val_ << ")"; return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return &val_ >= mem1 && &val_ <= mem2; } Type value_with_len_(const Index& j, const Index& len) const { return val_; } template void advance_location_(ExpressionSize& loc) const { } template Type value_at_location_(const ExpressionSize& loc) const { return val_; } template Type value_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return val_; } template Type value_stored_(const ExpressionSize& loc, const internal::ScratchVector& scratch) const { return val_; } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch) const { stack.push_rhs(1.0, gradient_index_); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch, const MyType& multiplier) const { stack.push_rhs(multiplier, gradient_index_); } template void set_location_(const ExpressionSize& i, ExpressionSize& index) const {} // The Stack::independent(x) and Stack::dependent(y) functions add // the gradient_index of objects x and y to std::vector // objects in Stack. Since x and y may be scalars or arrays, this // is best done by delegating to the ActiveConstReference or Array classes. template void push_gradient_indices(std::vector& vec) const { vec.push_back(gradient_index_); } // ------------------------------------------------------------------- // 5. Public member functions that modify the object // ------------------------------------------------------------------- // Set the value template void set_value(const MyType& x) { val_ = x; } // ------------------------------------------------------------------- // 6. Protected member functions // ------------------------------------------------------------------- protected: // ------------------------------------------------------------------- // 7. Data // ------------------------------------------------------------------- private: const Type& val_; // Reference to the numerical value Index gradient_index_; // Index to where the corresponding // gradient will be held during the // adjoint calculation }; // End of definition of ActiveConstReference // --------------------------------------------------------------------- // Helper function for ActiveConstReference class // --------------------------------------------------------------------- template inline std::ostream& operator<<(std::ostream& os, const ActiveConstReference& v) { os << v.value(); return os; } namespace internal { // --------------------------------------------------------------------- // active_const_reference // --------------------------------------------------------------------- // Return the active reference version of Type if it is active, // otherwise just return Type& template struct active_const_reference { typedef const Type& type; }; template struct active_const_reference { typedef ActiveConstReference type; }; } } // End namespace adept #endif ================================================ FILE: include/adept/ActiveReference.h ================================================ /* ActiveReference.h -- Reference to an active element of an array Copyright (C) 2015-2018 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. Provide an active scalar type where the data is actually a reference to an element of array. This enables an active array to be indexed such that the returned value can be used as an l-value and participate in expressions to be differentiated. */ #ifndef AdeptActiveReference_H #define AdeptActiveReference_H #include #include #include namespace adept { // --------------------------------------------------------------------- // Definition of ActiveReference class // --------------------------------------------------------------------- template class ActiveReference : public Expression > { // CONTENTS // 1. Preamble // 2. Constructors // 3. Operators // 4. Public member functions that don't modify the object // 5. Public member functions that modify the object // 6. Protected member functions // 7. Data public: // ------------------------------------------------------------------- // 1. Preamble // ------------------------------------------------------------------- // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = true; static const int rank = 0; static const int n_active = 1 + internal::is_complex::value; static const int n_arrays = 0; static const int n_scratch = 0; // ------------------------------------------------------------------- // 2. Constructors // ------------------------------------------------------------------- private: // There is only one way to construct an ActiveReference, so all // others that would otherwise be generated by the compiler are // made inaccessible ActiveReference() { } ActiveReference(ActiveReference& rhs) : val_(rhs.lvalue()), gradient_index_(rhs.gradient_index()) { } public: // In order to initialize this object, we pass in the gradient // index from the location in the array as the first argument. ActiveReference(Type& val, Index gradient_index) : val_(val), gradient_index_(gradient_index) { } // ActiveReference(const ActiveReference& rhs) : val_(const_cast&>(rhs).lvalue()), gradient_index_(rhs.gradient_index()) { } // Destructor does not unregister the object from the stack since // it is not the only reference to it. ~ActiveReference() { } // ------------------------------------------------------------------- // 3. Operators // ------------------------------------------------------------------- // Assignment operator with an inactive variable on the rhs template typename internal::enable_if::value, ActiveReference&>::type operator=(const PType& rhs) { val_ = rhs; // Pushing the gradient index on to the statement stack with no // corresponding operations ensures that the gradient will be // set to zero in the reverse pass when it is finished with #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif return *this; } // Assignment operator with an active variable on the rhs: first a // non-template version because otherwise compiler will generate // its own ActiveReference& operator=(const ActiveReference& rhs) { // Check there is space in the operation stack for one more // entry #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION ADEPT_ACTIVE_STACK->check_space(1); #endif val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif return *this; } // Assignment operator with an active variable on the rhs template ActiveReference& operator=(const Active& rhs) { // Check there is space in the operation stack for one more // entry #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION ADEPT_ACTIVE_STACK->check_space(1); #endif // Same as construction with an expression (defined above) val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif return *this; } // Assignment operator with an expression on the rhs template typename internal::enable_if::type operator=(const Expression& rhs) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION ADEPT_ACTIVE_STACK->check_space_static(); #endif val_ = rhs.scalar_value_and_gradient(*ADEPT_ACTIVE_STACK); ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } else { val_ = rhs.scalar_value(); } #endif return *this; } // All the compound assignment operators are unpacked, i.e. a+=b // becomes a=a+b; first for an Expression on the rhs template typename internal::enable_if::type operator+=(const Expression& rhs) { return *this = (*this + rhs); } template typename internal::enable_if::type operator-=(const Expression& rhs) { return *this = (*this - rhs); } template typename internal::enable_if::type operator*=(const Expression& rhs) { return *this = (*this * rhs); } template typename internal::enable_if::type operator/=(const Expression& rhs) { return *this = (*this / rhs); } // And likewise for a passive scalar on the rhs template typename internal::enable_if::value, ActiveReference&>::type operator+=(const PType& rhs) { val_ += rhs; return *this; } template typename internal::enable_if::value, ActiveReference&>::type operator-=(const PType& rhs) { val_ -= rhs; return *this; } template typename internal::enable_if::value, ActiveReference&>::type operator*=(const PType& rhs) { return *this = (*this * rhs); } template typename internal::enable_if::value, ActiveReference&>::type operator/=(const PType& rhs) { return *this = (*this / rhs); } // ------------------------------------------------------------------- // 4. Public member functions that don't modify the object // ------------------------------------------------------------------- // Get the underlying passive value of this object Type value() const { return val_; } // Get the index of the gradient information for this object const Index& gradient_index() const { return gradient_index_; } // If an expression leads to calc_gradient being called on an // active object, we push the multiplier and the gradient index on // to the operation stack (or 1.0 if no multiplier is specified) template void calc_gradient(Stack& stack, const ExpressionSize&) const { stack.push_rhs(1.0, gradient_index_); } template void calc_gradient(Stack& stack, const MyType& multiplier, const ExpressionSize&) const { stack.push_rhs(multiplier, gradient_index_); } // Set the value of the gradient, for initializing an adjoint; // note that the value of the gradient is not held in the active // object but rather held by the stack template void set_gradient(const MyType& gradient) const { return ADEPT_ACTIVE_STACK->set_gradients(gradient_index_, gradient_index_+1, &gradient); } // Get the value of the gradient, for extracting the adjoint after // calling reverse() on the stack template void get_gradient(MyType& gradient) const { return ADEPT_ACTIVE_STACK->get_gradients(gradient_index_, gradient_index_+1, &gradient); } Type get_gradient() const { Type gradient = 0; ADEPT_ACTIVE_STACK->get_gradients(gradient_index_, gradient_index_+1, &gradient); return gradient; } // For modular codes, some modules may have an existing // Jacobian code and possibly be unsuitable for automatic // differentiation using Adept (e.g. because they are written in // Fortran). In this case, we can use the following two functions // to "wrap" the non-Adept code. // Suppose the non-adept code uses the double values from n aReal // objects pointed to by "x" to produce a single double value // "y_val" (to be assigned to an aReal object "y"), plus a pointer // to an array of forward derivatives "dy_dx". Firstly you should // assign the value using simply "y = y_val;", then call // "y.add_derivative_dependence(x, dy_dx, n);" to specify how y // depends on x. A fourth argument "multiplier_stride" may be used // to stride the indexing to the derivatives, in case they are // part of a matrix that is oriented in a different sense. void add_derivative_dependence(const Active* rhs, const Real* multiplier, int n, int multiplier_stride = 1) const { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries ADEPT_ACTIVE_STACK->check_space(n); #endif for (int i = 0; i < n; i++) { Real mult = multiplier[i*multiplier_stride]; if (mult != 0.0) { // For each non-zero multiplier, add a pseudo-operation to // the operation stack ADEPT_ACTIVE_STACK->push_rhs(mult, rhs[i].gradient_index()); } } ADEPT_ACTIVE_STACK->push_lhs(gradient_index_); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // Suppose the non-Adept code uses double values from n aReal // objects pointed to by "x" and m aReal objects pointed to by "z" // to produce a single double value, plus pointers to arrays of // forward derivatives "dy_dx" and "dy_dz". Firstly, as above, // you should assign the value using simply "y = y_val;", then // call "y.add_derivative_dependence(x, dy_dx, n);" to specify how // y depends on x. To specify also how y depends on z, call // "y.append_derivative_dependence(z, dy_dz, n);". template void append_derivative_dependence(const Active* rhs, const Real* multiplier, int n, int multiplier_stride = 1) const { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries ADEPT_ACTIVE_STACK->check_space(n); #endif for (int i = 0; i < n; i ++) { Real mult = multiplier[i*multiplier_stride]; if (mult != 0.0) { // For each non-zero multiplier, add a pseudo-operation to // the operation stack ADEPT_ACTIVE_STACK->push_rhs(mult, rhs[i].gradient_index()); } } if (!(ADEPT_ACTIVE_STACK->update_lhs(gradient_index_))) { throw wrong_gradient("Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call" ADEPT_EXCEPTION_LOCATION); } #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // For only one independent variable on the rhs, these two // functions are convenient as they don't involve pointers template void add_derivative_dependence(T& rhs, Real multiplier) const { ADEPT_ACTIVE_STACK->add_derivative_dependence(gradient_index_, rhs.gradient_index(), multiplier); } template void append_derivative_dependence(T& rhs, Real multiplier) const { ADEPT_ACTIVE_STACK->append_derivative_dependence(gradient_index_, rhs.gradient_index(), multiplier); } // ------------------------------------------------------------------- // 4.1. Public member functions used by other expressions // ------------------------------------------------------------------- bool get_dimensions_(ExpressionSize<0>& dim) const { return true; } std::string expression_string_() const { std::stringstream s; s << "ActiveReference(" << val_ << ")"; return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return &val_ >= mem1 && &val_ <= mem2; } Type value_with_len_(const Index& j, const Index& len) const { return val_; } template void advance_location_(ExpressionSize& loc) const { } template Type value_at_location_(const ExpressionSize& loc) const { return val_; } template Type value_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return val_; } template Type value_stored_(const ExpressionSize& loc, const internal::ScratchVector& scratch) const { return val_; } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch) const { stack.push_rhs(1.0, gradient_index_); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch, const MyType& multiplier) const { stack.push_rhs(multiplier, gradient_index_); } template void set_location_(const ExpressionSize& i, ExpressionSize& index) const {} // The Stack::independent(x) and Stack::dependent(y) functions add // the gradient_index of objects x and y to std::vector // objects in Stack. Since x and y may be scalars or arrays, this // is best done by delegating to the ActiveReference or Array classes. template void push_gradient_indices(std::vector& vec) const { vec.push_back(gradient_index_); } // ------------------------------------------------------------------- // 5. Public member functions that modify the object // ------------------------------------------------------------------- // Set the value template void set_value(const MyType& x) { val_ = x; } // ------------------------------------------------------------------- // 6. Protected member functions // ------------------------------------------------------------------- protected: // For use in creating active references, to get a non-const // reference to the underlying passive data Type& lvalue() { return val_; } // ------------------------------------------------------------------- // 7. Data // ------------------------------------------------------------------- private: Type& val_; // Reference to the numerical value Index gradient_index_; // Index to where the corresponding // gradient will be held during the // adjoint calculation }; // End of definition of ActiveReference // --------------------------------------------------------------------- // Helper function for ActiveReference class // --------------------------------------------------------------------- template inline std::ostream& operator<<(std::ostream& os, const ActiveReference& v) { os << v.value(); return os; } namespace internal { // --------------------------------------------------------------------- // active_reference // --------------------------------------------------------------------- // Return the active reference version of Type if it is active, // otherwise just return Type& template struct active_reference { typedef Type& type; }; template struct active_reference { typedef ActiveReference type; }; } } // End namespace adept #endif ================================================ FILE: include/adept/Allocator.h ================================================ /* Allocator.h -- Allocates elements to arrays Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptAllocator_H #define AdeptAllocator_H 1 #include //#include namespace adept { namespace internal { template class Allocator { public: // Create an allocator object and copy the first object in it template Allocator(A& array, const F& first_arg) : array_(array), size_(array.dimensions()), // filled_size_(0), obj_size_(0), coords_(0), scalar_size_(1) { *this << first_arg; } // Copy a scalar into the array template typename enable_if::value,Allocator&>::type operator<<(const T& x) { if (coords_[Rank-1] >= size_[Rank-1]) { // We have reached the end of the array: move to next row complete_row(); // All dimensions of this object are of length 1 obj_size_.set_all(1); } else if (coords_[Rank-1] == 0) { // At the beginning of a row: set the size of the template // object to that of a scalar obj_size_ = scalar_size_; } else if (obj_size_ != scalar_size_) { // The template object size is not the same as a scalar, // indicating that dissimilar objects have been concatenated // in a row throw index_out_of_bounds("Scalar added to array with \"<<\" when previous objects on row were not scalar" ADEPT_EXCEPTION_LOCATION); } // Add the scalar to the array and increment the final index array_.get_lvalue(coords_) = x; ++coords_[Rank-1]; return *this; } // Copy an expression into the array template typename enable_if<(E::rank <= Rank), Allocator&>::type operator<<(const Expression& x) { // Evaluate expression and store in an Array of the same rank // (if Expression is already an Array then this will make a // shallow copy). Ought to check for aliasing. const Array xx(x.cast()); ExpressionSize leading_dim; // leading_dim.copy_dissimilar(xx.dimensions()); partial_copy(xx.dimensions(), leading_dim); if (coords_[Rank-1] >= size_[Rank-1]) { // We have reached the end of the array: move to next row complete_row(); } if (coords_[Rank-1] == 0) { partial_copy(xx.dimensions(), obj_size_); } else if (obj_size_ != leading_dim) { // The template object size is not the same as the current // array, indicating that dissimilar objects have been // concatenated in a row throw index_out_of_bounds("Expression added to array with \"<<\" does not match size of previous objects on row" ADEPT_EXCEPTION_LOCATION); } // Add the object to the array and increment the final index ExpressionSize i_lhs(coords_); ExpressionSize i_rhs(0); int rank; do { array_.get_lvalue(i_lhs) = xx.get_rvalue(i_rhs); advance_index(rank, i_lhs, i_rhs, xx.dimensions()); } while (rank >= 0); coords_[Rank-1] += xx.dimension(E::rank-1); return *this; } template void advance_index(int& rank, ExpressionSize& i_lhs, ExpressionSize& i_rhs, const ExpressionSize& size) const { rank = RhsRank; while (--rank >= 0) { if (++i_rhs[rank] >= size[rank]) { i_rhs[rank] = 0; i_lhs[rank+(Rank-RhsRank)] -= (size[rank]-1); } else { ++i_lhs[rank+(Rank-RhsRank)]; break; } } } // Comma operator does the same as "<<" operator template typename enable_if::value,Allocator&>::type operator,(const T& x) { return *this << x; } protected: // A vector should never complete a row as this indicates it has // been overfilled template typename enable_if<(MyRank <= 1), void>::type complete_row() { throw index_out_of_bounds("Row overflow in filling Vector with \"<<\"" ADEPT_EXCEPTION_LOCATION); } // Multi-dimensional arrays: move to next row, checking which // dimensions have been filled template typename enable_if<(MyRank > 1), void>::type complete_row() { int next_dim = Rank-2; while (next_dim >= 0) { if (coords_[next_dim]+obj_size_[next_dim] < size_[next_dim]) { // filled_size_[next_dim] += obj_size_[next_dim]; coords_[next_dim] += obj_size_[next_dim]; for (int i = next_dim+1; i < Rank; ++i) { coords_[i] = 0; } break; } --next_dim; } if (next_dim < 0) { throw index_out_of_bounds("Dimension overflow in filling array with \"<<\"" ADEPT_EXCEPTION_LOCATION); } obj_size_.set_all(0); } template typename enable_if<(MyRank > 1), void>::type partial_copy(const ExpressionSize& from, ExpressionSize& to) const { for (int i = 0; i < Rank-MyRank; ++i) { to[i] = 1; } for (int i = Rank-MyRank; i < Rank-1; ++i) { to[i] = from[i+(MyRank-Rank)]; } } template typename enable_if<(MyRank <= 1), void>::type partial_copy(const ExpressionSize& from, ExpressionSize& to) const { to.set_all(1); } protected: A& array_; const ExpressionSize size_; // ExpressionSize filled_size_; ExpressionSize obj_size_; ExpressionSize coords_; const ExpressionSize scalar_size_; }; } // Allow object to be filled with "A << 1, 2, 3"; template internal::Allocator > operator<<(Array& array, const E& x) { if (array.empty()) { throw empty_array("Attempt to fill empty array with \"<<\"" ADEPT_EXCEPTION_LOCATION); } return internal::Allocator >(array, x); } } #endif ================================================ FILE: include/adept/Array.h ================================================ /* Array.h -- active or inactive Array of arbitrary rank Copyright (C) 2014-2021 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The Array class has functionality modelled on Fortran-90 arrays - they can have a rank up to 7 (above will work, but some forms of indexing these arrays will not work). */ #ifndef AdeptArray_H #define AdeptArray_H 1 #include #include #include #include #include #ifdef ADEPT_CXX11_FEATURES #include #endif #include #include #include #include #include #include #include #include #include namespace adept { enum ArrayPrintStyle { PRINT_STYLE_PLAIN, PRINT_STYLE_CSV, PRINT_STYLE_CURLY, PRINT_STYLE_MATLAB }; enum MatrixStorageOrder { ROW_MAJOR=0, COL_MAJOR=1 }; // Forward declarations to enable diag_matrix template class SpecialMatrix; namespace internal { template struct BandEngine; } // Forward declaration to enable linking at construction and via // link to FixedArray template class FixedArray; namespace internal { // ------------------------------------------------------------------- // Global variables // ------------------------------------------------------------------- // The following global variables affect the behaviour of the // Array class, and are modified using set_* // This is "true" by default: row-major is the normal C/C++ // convention extern bool array_row_major_order; // When arrays are sent to a stream the dimensions can be grouped // with curly brackets // extern bool array_print_curly_brackets; // Variables describing how arrays are written to a stream extern ArrayPrintStyle array_print_style; extern std::string vector_separator; extern std::string vector_print_before; extern std::string vector_print_after; extern std::string array_opening_bracket; extern std::string array_closing_bracket; extern std::string array_contiguous_separator; extern std::string array_non_contiguous_separator; extern std::string array_print_before; extern std::string array_print_after; extern std::string array_print_empty_before; extern std::string array_print_empty_after; extern bool array_print_indent; extern bool array_print_empty_rank; // Forward declaration to enable Array::where() // template class Where; // ------------------------------------------------------------------- // Helper classes // ------------------------------------------------------------------- // The following are used by expression_string() template struct array_helper { const char* name() { return "Array"; } }; template struct array_helper { const char* name() { return "aArray"; } }; template <> struct array_helper<1,false> { const char* name() { return "Vector"; } }; template <> struct array_helper<1,true> { const char* name() { return "aVector"; } }; template <> struct array_helper<2,false> { const char* name() { return "Matrix"; } }; template <> struct array_helper<2,true> { const char* name() { return "aMatrix"; } }; } // End namespace internal // ------------------------------------------------------------------- // Definition of Array class // ------------------------------------------------------------------- template class Array : public Expression >, protected internal::GradientIndex { public: // ------------------------------------------------------------------- // Array: 1. Static Definitions // ------------------------------------------------------------------- // The Expression base class needs access to some protected member // functions in section 5 friend struct Expression >; // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = IsActive; static const bool is_lvalue = true; static const int rank = Rank; static const int n_active = IsActive * (1 + internal::is_complex::value); static const int n_scratch = 0; static const int n_arrays = 1; static const bool is_vectorizable = Packet::is_vectorized; // ------------------------------------------------------------------- // Array: 2. Constructors // ------------------------------------------------------------------- // Initialize an empty array Array() : data_(0), storage_(0), dimensions_(0) { ADEPT_STATIC_ASSERT(!(std::numeric_limits::is_integer && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS); } // Initialize an array with specified size Array(const Index* dims) : storage_(0) { resize(dims); } Array(const ExpressionSize& dims) : storage_(0) { resize(dims); } // A way to only enable construction if the correct number of // arguments is provided (resize_ is only defined for x==Rank) Array(Index m0) : storage_(0) { resize_<1>(m0); } Array(Index m0, Index m1) : storage_(0) { resize_<2>(m0,m1); } Array(Index m0, Index m1, Index m2) : storage_(0) { resize_<3>(m0,m1,m2); } Array(Index m0, Index m1, Index m2, Index m3) : storage_(0) { resize_<4>(m0,m1,m2,m3); } Array(Index m0, Index m1, Index m2, Index m3, Index m4) : storage_(0) { resize_<5>(m0,m1,m2,m3,m4); } Array(Index m0, Index m1, Index m2, Index m3, Index m4, Index m5) : storage_(0) { resize_<6>(m0,m1,m2,m3,m4,m5); } Array(Index m0, Index m1, Index m2, Index m3, Index m4, Index m5, Index m6) : storage_(0) { resize_<7>(m0,m1,m2,m3,m4,m5,m6); } // A way to directly create arrays, needed when subsetting // other arrays Array(Type* data, Storage* s, const ExpressionSize& dims, const ExpressionSize& offset) : data_(data), storage_(s), dimensions_(dims), offset_(offset) { if (storage_) { storage_->add_link(); internal::GradientIndex::set(data_, storage_); } else { // Active arrays need a gradient index so it is an error for // them to get to this point internal::GradientIndex::assert_inactive(); } } // Similar to the above, but with the gradient index supplied explicitly, // needed when an active FixedArray is being sliced, which // produces an active Array Array(const Type* data0, Index data_offset, const ExpressionSize& dims, const ExpressionSize& offset, Index gradient_index0) : internal::GradientIndex(gradient_index0, data_offset), data_(const_cast(data0)+data_offset), storage_(0), dimensions_(dims), offset_(offset) { } // Initialize an array pointing at existing data: the fact that // storage_ is a null pointer is used to convey the information // that it is not necessary to deallocate the data when this array // is destructed Array(Type* data, const ExpressionSize& dims) : data_(data), storage_(0), dimensions_(dims) { ADEPT_STATIC_ASSERT(!IsActive, CANNOT_CONSTRUCT_ACTIVE_ARRAY_WITHOUT_GRADIENT_INDEX); // Active arrays need a gradient index so it is an error for // them to get to this point internal::GradientIndex::assert_inactive(); pack_contiguous_(); } // Copy constructor: links to the source data rather than copying // it. This is needed because we want a function returning an // Array not to make a deep copy, but rather to perform a // (computationally cheaper) shallow copy; when the Array within // the function is destructed, it will remove its link to the // data, and the responsibility for deallocating the data will // then pass to the Array in the calling function. Array(Array& rhs) : internal::GradientIndex(rhs.gradient_index()), data_(rhs.data()), storage_(rhs.storage()), dimensions_(rhs.dimensions()), offset_(rhs.offset()) { if (storage_) storage_->add_link(); #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running constructor Array(Array&)\n"; #endif } // Copy constructor with const argument does exactly the same // thing Array(const Array& rhs) : internal::GradientIndex(rhs.gradient_index()), dimensions_(rhs.dimensions()), offset_(rhs.offset()) { link_(const_cast(rhs)); #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running constructor Array(const Array&)\n"; #endif } private: void link_(Array& rhs) { data_ = const_cast(rhs.data()); storage_ = const_cast*>(rhs.storage()); if (storage_) storage_->add_link(); } public: // Initialize with an expression on the right hand side by // evaluating the expression, requiring the ranks to be equal. // Note that this constructor enables expressions to be used as // arguments to functions that expect an array - to prevent this // implicit conversion, use the "explicit" keyword. template Array(const Expression& rhs, typename internal::enable_if 0),int>::type = 0) : data_(0), storage_(0), dimensions_(0) { #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running constructor Array(const Expression&), implemented by assignment\n"; #endif *this = rhs; } #ifdef ADEPT_CXX11_FEATURES // Initialize from initializer list template Array(std::initializer_list list) : data_(0), storage_(0), dimensions_(0) { *this = list; } // The unfortunate restrictions on initializer_list constructors // mean that each possible Array rank needs explicit treatment template Array(std::initializer_list< std::initializer_list > list) : data_(0), storage_(0), dimensions_(0) { *this = list; } template Array(std::initializer_list< std::initializer_list< std::initializer_list > > list) : data_(0), storage_(0), dimensions_(0) { *this = list; } template Array(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > list) : data_(0), storage_(0), dimensions_(0) { *this = list; } template Array(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > list) : data_(0), storage_(0), dimensions_(0) { *this = list; } template Array(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > > list) : data_(0), storage_(0), dimensions_(0) { *this = list; } template Array(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > > > list) : data_(0), storage_(0), dimensions_(0) { *this = list; } #endif // Destructor: if the data are stored in a Storage object then we // tell it that one fewer object is linking to it; if the number // of links to it drops to zero, it will destruct itself and // deallocate the memory. ~Array() { if (storage_) storage_->remove_link(); } // ------------------------------------------------------------------- // Array: 3. Assignment operators // ------------------------------------------------------------------- // Assignment to another matrix: copy the data... // Ideally we would like this to fall back to the operator=(const // Expression&) function, but if we don't define a copy assignment // operator then C++ will generate a default one :-( Array& operator=(const Array& rhs) { #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running Array::operator=(const Array&), implemented with operator=(const Expression&)\n"; #endif return (*this = static_cast&> (rhs)); } #ifdef ADEPT_MOVE_SEMANTICS Array& operator=(Array&& rhs) { #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running Array::operator=(Array&&)\n"; #endif // A fast "swap" operation can be performed only if the present // ("this") array is either empty, or its data is contained in a // Storage object with only one link to it (corresponding to the // present array). We may not perform a swap if its data is not // in a Storage object, since it might be linked to another // location that is expecting the result of the assignment to // change the data in that location. We also require that the // RHS data would otherwise be lost (but it is not clear that // this is necessary). if ((empty() || (storage_ && storage_->n_links() == 1)) && (!rhs.storage() || rhs.storage()->n_links() == 1)) { // We still need to check that the dimensions match if (empty() || internal::compatible(dimensions_, rhs.dimensions())) { swap(*this, rhs); } else { std::string str = rhs.expression_string() + " assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } } else { // Need a full copy because other arrays are linked to the // Storage object *this = static_cast&> (rhs); } return *this; } friend void swap(Array& l, Array& r) noexcept { #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running swap(Array&,Array&)\n"; #endif Type* tmp_data = l.data_; l.data_ = r.data_; r.data_ = tmp_data; Storage* tmp_storage = l.storage_; l.storage_ = r.storage_; r.storage_ = tmp_storage; swap(l.dimensions_, r.dimensions_); swap(l.offset_, r.offset_); static_cast&>(l).swap_value(static_cast&>(r)); } #endif // Assignment to an array expression of the same rank template inline //__attribute__((always_inline)) typename internal::enable_if::type operator=(const Expression& __restrict rhs) { #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running Array::operator=(const Expression&)\n"; #endif #ifndef ADEPT_NO_DIMENSION_CHECKING ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (empty()) { resize(dims); } else if (!internal::compatible(dims, dimensions_)) { std::string str = "Expr"; str += dims.str() + " object assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } #else if (empty()) { ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } resize(dims); } #endif if (!empty()) { #ifndef ADEPT_NO_ALIAS_CHECKING // Check for aliasing first Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (rhs.is_aliased(ptr_begin, ptr_end)) { Array copy; // It would be nice to wrap noalias around rhs, but then // this leads to infinite template recursion since the "=" // operator calls the current function but with a modified // expression type. perhaps a better way would be to make // copy.assign_no_alias(rhs) work. copy = rhs; assign_expression_(copy); } else { #endif // Select active/passive version by delegating to a // protected function // The cast() is needed because assign_expression_ accepts // its argument by value assign_expression_(rhs.cast()); #ifndef ADEPT_NO_ALIAS_CHECKING } #endif } return *this; } // Assignment to an array expression of the same rank in which the // activeness of the right-hand-side is ignored template typename internal::enable_if::type assign_inactive(const Expression& rhs) { ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (empty()) { resize(dims); } else if (!internal::compatible(dims, dimensions_)) { std::string str = "Expr"; str += dims.str() + " object assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } if (!empty()) { // Check for aliasing first Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (rhs.is_aliased(ptr_begin, ptr_end)) { Array copy; copy.assign_inactive(rhs); // *this = copy; assign_expression_(copy); } else { assign_expression_(rhs.cast()); } } return *this; } // Assignment to a single value copies to every element template typename internal::enable_if::value // FIX || internal::is_active::value , Array&>::type operator=(RType rhs) { if (!empty()) { assign_inactive_scalar_(rhs); } return *this; } // Assign active scalar expression to an active array by first // converting the RHS to an active scalar template typename internal::enable_if 0) && IsActive && !E::is_lvalue, Array&>::type operator=(const Expression& rhs) { Active x = rhs; *this = x; return *this; } // Assign an active scalar to an active array template // FIX typename internal::enable_if::value && IsActive, Array&>::type // Array& operator=(const Active& rhs) { ADEPT_STATIC_ASSERT(IsActive, ATTEMPT_TO_ASSIGN_ACTIVE_SCALAR_TO_INACTIVE_ARRAY); if (!empty()) { #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar_(rhs.scalar_value()); return *this; } #endif ExpressionSize i(0); Index index = 0; int my_rank; static const int last = Rank-1; // In case PType != Type we make a local copy to minimize type // conversions Type val = rhs.scalar_value(); ADEPT_ACTIVE_STACK->check_space(size()); do { i[last] = 0; // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { data_[index] = val; ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index()); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); } advance_index(index, my_rank, i); } while (my_rank >= 0); } return *this; } #define ADEPT_DEFINE_OPERATOR(OPERATOR, OPSYMBOL) \ template \ Array& OPERATOR(const RType& rhs) { \ return *this = noalias(*this OPSYMBOL rhs); \ } ADEPT_DEFINE_OPERATOR(operator+=, +) ADEPT_DEFINE_OPERATOR(operator-=, -) ADEPT_DEFINE_OPERATOR(operator*=, *) ADEPT_DEFINE_OPERATOR(operator/=, /) // ADEPT_DEFINE_OPERATOR(operator&=, &); // ADEPT_DEFINE_OPERATOR(operator|=, |); #undef ADEPT_DEFINE_OPERATOR // Enable the A.where(B) = C construct. // Firstly implement the A.where(B) to return a "Where" object template typename internal::enable_if >::type where(const Expression& bool_expr) { #ifndef ADEPT_NO_DIMENSION_CHECKING ExpressionSize dims; if (!bool_expr.get_dimensions(dims)) { std::string str = "Array size mismatch in " + bool_expr.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims != dimensions_) { throw size_mismatch("Boolean expression of different size" ADEPT_EXCEPTION_LOCATION); } #endif return internal::Where(*this, bool_expr.cast()); } // When Where = C is invoked, it calls // A.assign_conditional(B,C). This is implemented separately for // the case when C is an inactive scalar and when it is an array // expression. template typename internal::enable_if::value, void>::type assign_conditional(const Expression& bool_expr, C rhs) { if (!empty()) { assign_conditional_inactive_scalar_(bool_expr, rhs); } } template void assign_conditional(const Expression& bool_expr, const Expression& rhs) { // Assume size of bool_expr already checked #ifndef ADEPT_NO_DIMENSION_CHECKING ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (!internal::compatible(dims,dimensions_)) { throw size_mismatch("Right-hand-side of \"where\" construct of incompatible size" ADEPT_EXCEPTION_LOCATION); } #endif // Check for aliasing first Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (rhs.is_aliased(ptr_begin, ptr_end)) { Array copy; copy = rhs; assign_conditional_(bool_expr.cast(), copy); } else { // Select active/passive version by delegating to a // protected function assign_conditional_(bool_expr.cast(), rhs.cast()); } // return *this; } #ifdef ADEPT_CXX11_FEATURES // Assignment of an Array to an initializer list; the first ought // to only work for Vectors template typename internal::enable_if::value, Array&>::type operator=(std::initializer_list list) { ADEPT_STATIC_ASSERT(Rank==1,RANK_MISMATCH_IN_INITIALIZER_LIST); if (empty()) { resize(list.size()); } else if (list.size() > static_cast(dimensions_[0])) { throw size_mismatch("Initializer list is larger than Vector in assignment" ADEPT_EXCEPTION_LOCATION); } // Zero the whole array first in order that automatic // differentiation works *this = 0; Index index = 0; for (auto i = std::begin(list); i < std::end(list); ++i, ++index) { data_[index*offset_[0]] = *i; } return *this; } // Assignment of a higher rank Array to a list of lists... template Array& operator=(std::initializer_list > list) { ADEPT_STATIC_ASSERT(Rank==internal::initializer_list_rank::value+2, RANK_MISMATCH_IN_INITIALIZER_LIST); if (empty()) { Index dims[ADEPT_MAX_ARRAY_DIMENSIONS]; int ndims = 0; shape_initializer_list_(list, dims, ndims); resize(dims); } else if (list.size() > static_cast(dimensions_[0])) { throw size_mismatch("Multi-dimensional initializer list larger than slowest-varying dimension of Array" ADEPT_EXCEPTION_LOCATION); } Index index = 0; for (auto i = std::begin(list); i < std::end(list); ++i, ++index) { (*this)[index] = *i; } return *this; } protected: template typename internal::enable_if::value>::type shape_initializer_list_(std::initializer_list list, Index* dims, int& ndims) const { dims[ndims] = list.size(); ndims++; } template void shape_initializer_list_(std::initializer_list > list, Index* dims, int& ndims) const { dims[ndims] = list.size(); ndims++; shape_initializer_list_(*(list.begin()), dims, ndims); } public: #endif // ------------------------------------------------------------------- // Array: 4. Access functions, particularly operator() // ------------------------------------------------------------------- // Get l-value of the element at the specified coordinates typename internal::active_reference::type get_lvalue(const ExpressionSize& i) { return get_lvalue_(index_(i)); } typename internal::active_scalar::type get_rvalue(const ExpressionSize& i) const { return get_rvalue_(index_(i)); } protected: template typename internal::enable_if >::type get_lvalue_(const Index& loc) { return ActiveReference(data_[loc], gradient_index()+loc); } template typename internal::enable_if::type get_lvalue_(const Index& loc) { return data_[loc]; } template typename internal::enable_if >::type get_rvalue_(const Index& loc) const { return Active(data_[loc], gradient_index()+loc); } template typename internal::enable_if::type get_rvalue_(const Index& loc) const { return data_[loc]; } public: // Get a constant reference to the element at the specified // location, ignoring whether it is active or not // const Type& get(const ExpressionSize& i) const { // return data_[index_(i)]; // } // The following provide a way to access individual elements of // the array. There must be the same number of arguments to // operator() as the rank of the array. Each argument must be of // integer type, or a rank-0 expression of integer type (such as // "end" or "end-3"). Inactive arrays return a reference to the // element, while active arrays return an ActiveReference // object. Up to 7 dimensions are supported. // l-value access to inactive array with function-call operator template typename internal::enable_if::value && !IsActive, Type&>::type operator()(I0 i0) { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; } // r-value access to inactive array with function-call operator template typename internal::enable_if::value && !IsActive, const Type&>::type operator()(I0 i0) const { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; } // l-value access to inactive array with element-access operator template typename internal::enable_if::value && !IsActive, Type&>::type operator[](I0 i0) { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; } // r-value access to inactive array with element-access operator template typename internal::enable_if::value && !IsActive, const Type&>::type operator[](I0 i0) const { return data_[internal::get_index_with_len(i0,dimensions_[0])*offset_[0]]; } protected: template typename internal::enable_if::type get_scalar_reference(const Index& offset) { return data_[offset]; } template typename internal::enable_if::type get_scalar_reference(const Index& offset) const { return data_[offset]; } template typename internal::enable_if >::type get_scalar_reference(const Index& offset) { return ActiveReference(data_[offset], gradient_index()+offset); } template typename internal::enable_if >::type get_scalar_reference(const Index& offset) const { return ActiveConstReference(data_[offset], gradient_index()+offset); } public: // l-value access to active array with function-call operator template typename internal::enable_if::value && IsActive, ActiveReference >::type operator()(I0 i0) { Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0]; return ActiveReference(data_[offset], gradient_index()+offset); } // r-value access to active array with function-call operator template typename internal::enable_if::value && IsActive, ActiveConstReference >::type operator()(I0 i0) const { Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0]; return ActiveConstReference(data_[offset], gradient_index()+offset); } // l-value access to active array with element-access operator template typename internal::enable_if::value && IsActive, ActiveReference >::type operator[](I0 i0) { Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0]; return ActiveReference(data_[offset], gradient_index()+offset); } // r-value access to active array with element-access operator template typename internal::enable_if::value && IsActive, ActiveConstReference >::type operator[](I0 i0) const { Index offset = internal::get_index_with_len(i0,dimensions_[0])*offset_[0]; return ActiveConstReference(data_[offset], gradient_index()+offset); } // 2D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1) { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1) const { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1]); } // 3D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2) { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2) const { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2]); } // 4D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) const { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3]); } // 5D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3] + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3] + internal::get_index_with_len(i4,dimensions_[4])*offset_[4]); } // 6D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3] + internal::get_index_with_len(i4,dimensions_[4])*offset_[4] + internal::get_index_with_len(i5,dimensions_[5])*offset_[5]); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3] + internal::get_index_with_len(i4,dimensions_[4])*offset_[4] + internal::get_index_with_len(i5,dimensions_[5])*offset_[5]); } // 7D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3] + internal::get_index_with_len(i4,dimensions_[4])*offset_[4] + internal::get_index_with_len(i5,dimensions_[5])*offset_[5] + internal::get_index_with_len(i6,dimensions_[6])*offset_[6]); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const { return get_scalar_reference( internal::get_index_with_len(i0,dimensions_[0])*offset_[0] + internal::get_index_with_len(i1,dimensions_[1])*offset_[1] + internal::get_index_with_len(i2,dimensions_[2])*offset_[2] + internal::get_index_with_len(i3,dimensions_[3])*offset_[3] + internal::get_index_with_len(i4,dimensions_[4])*offset_[4] + internal::get_index_with_len(i5,dimensions_[5])*offset_[5] + internal::get_index_with_len(i6,dimensions_[6])*offset_[6]); } // The following define the case when operator() is called and one // of the arguments is a "range" object (an object that describes // a range of indices that are either contiguous or separated by a // fixed stride), while all others are of integer type (or a // rank-0 expression of integer type). An array object is returned // with a rank that may be reduced from that of the original // array, by one for each dimension that was indexed by an // integer. The new array points to a subset of the original data, // so modifying it will modify the original array. // First the case of a vector where we know the argument must be a // "range" object template typename internal::enable_if::value, Array<1,Type,IsActive> >::type operator()(I0 i0) { ExpressionSize<1> new_dim((i0.end(dimensions_[0]) + i0.stride(dimensions_[0]) -i0.begin(dimensions_[0])) /i0.stride(dimensions_[0])); ExpressionSize<1> new_offset(i0.stride(dimensions_[0])*offset_[0]); #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running Array::operator()(RANGED)\n"; #endif return Array<1,Type,IsActive>(data_ + i0.begin(dimensions_[0])*offset_[0], storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array<1,Type,IsActive> >::type operator()(I0 i0) const { ExpressionSize<1> new_dim((i0.end(dimensions_[0]) + i0.stride(dimensions_[0]) -i0.begin(dimensions_[0])) /i0.stride(dimensions_[0])); ExpressionSize<1> new_offset(i0.stride(dimensions_[0])*offset_[0]); #ifdef ADEPT_VERBOSE_FUNCTIONS std::cout << " running Array::operator()(RANGED) const\n"; #endif return Array<1,Type,IsActive>(data_ + i0.begin(dimensions_[0])*offset_[0], storage_, new_dim, new_offset); } private: // For multi-dimensional arrays, we need a helper function // Treat the indexing of dimension "irank" in the case that the // index is of integer type template typename internal::enable_if::value, void>::type update_index(const Index& irank, const T& i, Index& inew_rank, Index& ibegin, ExpressionSize& new_dim, ExpressionSize& new_offset) const { ibegin += internal::get_index_with_len(i,dimensions_[irank])*offset_[irank]; } // Treat the indexing of dimension "irank" in the case that the // index is a "range" object template typename internal::enable_if::value, void>::type update_index(const Index& irank, const T& i, Index& inew_rank, Index& ibegin, ExpressionSize& new_dim, ExpressionSize& new_offset) const { ibegin += i.begin(dimensions_[irank])*offset_[irank]; new_dim[inew_rank] = (i.end(dimensions_[irank]) + i.stride(dimensions_[irank])-i.begin(dimensions_[irank])) / i.stride(dimensions_[irank]); new_offset[inew_rank] = i.stride(dimensions_[irank])*offset_[irank]; ++inew_rank; } public: // Now the individual overloads for each number of arguments, up // to 7, with separate r-value (const) and l-value (non-const) // versions template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); update_index(4, i4, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); update_index(4, i4, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); update_index(4, i4, inew_rank, ibegin, new_dim, new_offset); update_index(5, i5, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); update_index(4, i4, inew_rank, ibegin, new_dim, new_offset); update_index(5, i5, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); update_index(4, i4, inew_rank, ibegin, new_dim, new_offset); update_index(5, i5, inew_rank, ibegin, new_dim, new_offset); update_index(6, i6, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index(0, i0, inew_rank, ibegin, new_dim, new_offset); update_index(1, i1, inew_rank, ibegin, new_dim, new_offset); update_index(2, i2, inew_rank, ibegin, new_dim, new_offset); update_index(3, i3, inew_rank, ibegin, new_dim, new_offset); update_index(4, i4, inew_rank, ibegin, new_dim, new_offset); update_index(5, i5, inew_rank, ibegin, new_dim, new_offset); update_index(6, i6, inew_rank, ibegin, new_dim, new_offset); return Array(data_ + ibegin, storage_, new_dim, new_offset); } // If one or more of the indices is not guaranteed to be monotonic // at compile time then we must return an IndexedArray, now done // for all possible numbers of arguments // Indexing a 1D array template typename internal::enable_if::value && !internal::is_ranged::value, internal::IndexedArray >::type operator()(const I0& i0) { return internal::IndexedArray(*this, i0); } template typename internal::enable_if::value && !internal::is_ranged::value, const internal::IndexedArray >::type operator()(const I0& i0) const { return internal::IndexedArray(*const_cast(this), i0); } // Indexing a 2D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,Array,I0,I1> >::type operator()(const I0& i0, const I1& i1) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,Array,I0,I1> >::type operator()(const I0& i0, const I1& i1) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1); } // Indexing a 3D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2> >::type operator()(const I0& i0, const I1& i1, const I2& i2) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1, i2); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2> >::type operator()(const I0& i0, const I1& i1, const I2& i2) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1, i2); } // Indexing a 4D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1, i2, i3); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1, i2, i3); } // Indexing a 5D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3,I4> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1, i2, i3, i4); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3,I4> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1, i2, i3, i4); } // Indexing a 6D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3,I4,I5> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this,i0,i1,i2,i3,i4,i5); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3,I4,I5> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this),i0,i1,i2,i3,i4,i5); } // Indexing a 7D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3,I4,I5,I6> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this,i0,i1,i2,i3,i4,i5,i6); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,Array,I0,I1,I2,I3,I4,I5,I6> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this),i0,i1,i2,i3,i4,i5,i6); } // Provide a C-array-like array access: for a multidimensional // array, operator[](i), where i is of integer type, returns an // array of rank one less than the original array, where the new // array is "sliced" at index i of dimension 0. For a vector, // operator[](i) returns an l-value to the element at i. Thus for // a 3D array A, A[1][2][3] returns a single element. Note that // this will be slower than A(1,2,3) because each operator[] // creates a new array (although does not copy the data). template typename internal::enable_if::value && (Rank > 1), Array >::type operator[](T i) { int index = internal::get_index_with_len(i,dimensions_[0])*offset_[0]; ExpressionSize new_dim; ExpressionSize new_offset; for (int j = 1; j < Rank; ++j) { new_dim[j-1] = dimensions_[j]; new_offset[j-1] = offset_[j]; } return Array(data_ + index, storage_, new_dim, new_offset); } // The const version, alas, throws away the constness because we // don't have a way of returning an unmodifiable array template typename internal::enable_if::value && (Rank > 1), Array >::type operator[](T i) const { int index = internal::get_index_with_len(i,dimensions_[0])*offset_[0]; ExpressionSize new_dim; ExpressionSize new_offset; for (int j = 1; j < Rank; ++j) { new_dim[j-1] = dimensions_[j]; new_offset[j-1] = offset_[j]; } return Array(const_cast(data_) + index, storage_, new_dim, new_offset); } // diag_matrix(), where *this is a 1D array, returns a DiagMatrix // containing the data as the diagonal pointing to the original // data, Can be used as an lvalue. SpecialMatrix, IsActive> diag_matrix(); Array<1,Type,IsActive> diag_vector(Index offdiag = 0) { ADEPT_STATIC_ASSERT(Rank == 2, DIAG_VECTOR_ONLY_WORKS_ON_SQUARE_MATRICES); if (empty()) { // Return an empty vector return Array<1,Type,IsActive>(); } else if (dimensions_[0] != dimensions_[1]) { throw invalid_operation("diag_vector member function only applicable to square matrices" ADEPT_EXCEPTION_LOCATION); } else if (offdiag >= 0) { Index new_dim = std::min(dimensions_[0], dimensions_[1]-offdiag); return Array<1,Type,IsActive>(data_+offset_[1]*offdiag, storage_, ExpressionSize<1>(new_dim), ExpressionSize<1>(offset_[0]+offset_[1])); } else { Index new_dim = std::min(dimensions_[0]+offdiag, dimensions_[1]); return Array<1,Type,IsActive>(data_-offset_[0]*offdiag, storage_, ExpressionSize<1>(new_dim), ExpressionSize<1>(offset_[0]+offset_[1])); } } Array submatrix_on_diagonal(Index ibegin, Index iend) { ADEPT_STATIC_ASSERT(Rank == 2, SUBMATRIX_ON_DIAGONAL_ONLY_WORKS_ON_SQUARE_MATRICES); if (dimensions_[0] != dimensions_[1]) { throw invalid_operation("submatrix_on_diagonal member function only applicable to square matrices" ADEPT_EXCEPTION_LOCATION); } else if (ibegin < 0 || ibegin > iend || iend >= dimensions_[0]) { throw index_out_of_bounds("Dimensions out of range in submatrix_on_diagonal" ADEPT_EXCEPTION_LOCATION); } else { Index len = iend-ibegin+1; ExpressionSize<2> dim(len,len); return Array(data_+ibegin*(offset_[0]+offset_[1]), storage_, dim, offset_); } } // For extracting contiguous sections out of an array use the // following. Currently this just indexes each dimension with the // contiguous range(a,b) index, but in future it may be optimized. // 1D array subset template Array subset(const B0& ibegin0, const E0& iend0) { ADEPT_STATIC_ASSERT(Rank == 1, SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY); return (*this)(range(ibegin0,iend0)); } template const Array subset(const B0& ibegin0, const E0& iend0) const { ADEPT_STATIC_ASSERT(Rank == 1, SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY); return (*this)(range(ibegin0,iend0)); } // 2D array subset template Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1) { ADEPT_STATIC_ASSERT(Rank == 2, SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1)); } template const Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1) const { ADEPT_STATIC_ASSERT(Rank == 2, SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1)); } // 3D array subset template Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2) { ADEPT_STATIC_ASSERT(Rank == 3, SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2)); } template const Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2) const { ADEPT_STATIC_ASSERT(Rank == 3, SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2)); } // 4D array subset template Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3) { ADEPT_STATIC_ASSERT(Rank == 4, SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3)); } template const Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3) const { ADEPT_STATIC_ASSERT(Rank == 4, SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3)); } // 5D array subset template Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4) { ADEPT_STATIC_ASSERT(Rank == 5, SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4)); } template const Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4) const { ADEPT_STATIC_ASSERT(Rank == 5, SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4)); } // 6D array subset template Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5) { ADEPT_STATIC_ASSERT(Rank == 6, SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5)); } template const Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5) const { ADEPT_STATIC_ASSERT(Rank == 6, SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5)); } // 7D array subset template Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5, const B6& ibegin6, const E6& iend6) { ADEPT_STATIC_ASSERT(Rank == 7, SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5), range(ibegin6,iend6)); } template const Array subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5, const B6& ibegin6, const E6& iend6) const { ADEPT_STATIC_ASSERT(Rank == 7, SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5), range(ibegin6,iend6)); } // ------------------------------------------------------------------- // Array: 5. Public member functions // ------------------------------------------------------------------- // Link to an existing array of the same rank, type and activeness Array& link(Array& rhs) { if (!rhs.data()) { throw empty_array("Attempt to link to empty array" ADEPT_EXCEPTION_LOCATION); } else { clear(); data_ = rhs.data(); storage_ = rhs.storage(); dimensions_.copy(rhs.dimensions()); offset_.copy(rhs.offset()); if (storage_) { storage_->add_link(); } if (IsActive) { internal::GradientIndex::set(data_, storage_); } } return *this; } // Fortran-like link syntax A >>= B Array& operator>>=(Array& rhs) { return link(rhs); } #ifndef ADEPT_MOVE_SEMANTICS // A common pattern is to link to a subset of another Array, // e.g. vec1.link(vec2(range(2,4))), but the problem is that the // argument to link is a temporary so will not bind to Array&. In // C++98 we therefore need a function taking const Array& and then // cast away the const-ness. This has the unfortunate side effect // that a non-const Array can be linked to a const Array. Array& link(const Array& rhs) { return link(const_cast(rhs)); } Array& operator>>=(const Array& rhs) { return link(const_cast(rhs)); } #else // But in C++11 we can solve this problem and only bind to // temporary non-const Arrays Array& link(Array&& rhs) { return link(const_cast(rhs)); } Array& operator>>=(Array&& rhs) { return link(const_cast(rhs)); } #endif // To prevent linking to an rvalue expression we write a templated // function that will fail to compile template typename internal::enable_if::type link(const Expression&) { ADEPT_STATIC_ASSERT(E::is_lvalue, CAN_ONLY_LINK_TO_AN_LVALUE_EXPRESSION); } template typename internal::enable_if::type operator>>=(const Expression&) { ADEPT_STATIC_ASSERT(E::is_lvalue, CAN_ONLY_LINK_TO_AN_LVALUE_EXPRESSION); } // STL-like size() returns total length of array Index size() const { Index s = 1; for (int i = 0; i < Rank; ++i) { s *= dimensions_[i]; } return s; } // Return constant reference to dimensions const ExpressionSize& dimensions() const { return dimensions_; } bool get_dimensions_(ExpressionSize& dim) const { dim = dimensions_; return true; } // Return individual dimension - probably deprecate "dimension" in // favour of "size" Index dimension(int j) const { return dimensions_[j]; } Index size(int j) const { return dimensions_[j]; } // Return individual offset Index offset(int j) const { return offset_[j]; } // Return constant reference to offsets const ExpressionSize& offset() const { return offset_; } const Index& last_offset() const { return offset_[Rank-1]; } // Return true if the array is empty bool empty() const { return (dimensions_[0] == 0); } // Return a string describing the array std::string info_string() const { std::stringstream str; str << "Array<" << Rank << ">, dim=" << dimensions_ << ", offset=" << offset_ << ", data_location=" << data_; if (IsActive) { str << ", gradient_index=" << gradient_index(); } return str.str(); } // Return a pointer to the start of the data Type* data() { return data_; } const Type* data() const { return data_; } const Type* const_data() const { return data_; } // Older style Type* data_pointer() { return data_; } const Type* data_pointer() const { return data_; } const Type* const_data_pointer() const { return data_; } // For vectors only, we allow a pointer to be returned to a // specified element Type* data_pointer(Index i) { ADEPT_STATIC_ASSERT(Rank == 1, CAN_ONLY_USE_DATA_POINTER_WITH_INDEX_ON_VECTORS); if (data_) { return data_ + offset_[0]*i; } else { return 0; } } const Type* const_data_pointer(Index i) const { ADEPT_STATIC_ASSERT(Rank == 1, CAN_ONLY_USE_CONST_DATA_POINTER_WITH_INDEX_ON_VECTORS); if (data_) { return data_ + offset_[0]*i; } else { return 0; } } // Return a pointer to the storage object Storage* storage() { return storage_; } // Reset the array to its original empty state, removing the link // to the data (which may deallocate the data if it was the only // link) and set the dimensions to zero void clear() { if (storage_) { storage_->remove_link(); storage_ = 0; } data_ = 0; dimensions_.set_all(0); offset_.set_all(0); internal::GradientIndex::clear(); } // Resize an array void resize(const Index* dim, bool force_contiguous = false) { ADEPT_STATIC_ASSERT(!(std::numeric_limits::is_integer && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS); if (storage_) { storage_->remove_link(); storage_ = 0; } // Check requested dimensions for (int i = 0; i < Rank; ++i) { if (dim[i] < 0) { throw invalid_dimension("Negative array dimension requested" ADEPT_EXCEPTION_LOCATION); } else if (dim[i] == 0) { // If any of the dimensions is zero, we clear the array // completely and all dimensions will be zero clear(); return; } } dimensions_.copy(dim); // Copy dimensions if (force_contiguous) { pack_contiguous_(); } else { pack_(); } Index data_vol; if (internal::array_row_major_order) { data_vol = offset_[0]*dimensions_[0]; } else { data_vol = size(); } storage_ = new Storage(data_vol, IsActive); data_ = storage_->data(); internal::GradientIndex::set(data_, storage_); } // Resize with an ExpressionSize object void resize(const ExpressionSize& dim) { resize(&dim[0]); } // Resize using contiguous storage with an ExpressionSize object void resize_contiguous(const ExpressionSize& dim) { resize(&dim[0], true); } // Resize specifying order void resize_row_major(const ExpressionSize& dim) { resize(&dim[0]); pack_row_major_(); } void resize_row_major_contiguous(const ExpressionSize& dim) { resize(&dim[0], true); pack_row_major_contiguous_(); } void resize_column_major(const ExpressionSize& dim) { resize(&dim[0]); pack_column_major_(); } // Resize with integer arguments void resize(Index m0, Index m1=-1, Index m2=-1, Index m3=-1, Index m4=-1, Index m5=-1, Index m6=-1) { Index dim[7] = {m0, m1, m2, m3, m4, m5, m6}; // Check invalid dimensions for (int i = 0; i < Rank; ++i) { if (dim[i] < 0) { throw invalid_dimension("Invalid dimensions in array resize" ADEPT_EXCEPTION_LOCATION); } } resize(dim); } void resize_row_major(Index m0, Index m1=-1, Index m2=-1, Index m3=-1, Index m4=-1, Index m5=-1, Index m6=-1) { Index dim[7] = {m0, m1, m2, m3, m4, m5, m6}; // Check invalid dimensions for (int i = 0; i < Rank; ++i) { if (dim[i] < 0) { throw invalid_dimension("Invalid dimensions in array resize" ADEPT_EXCEPTION_LOCATION); } } resize_row_major(dim); } void resize_column_major(Index m0, Index m1=-1, Index m2=-1, Index m3=-1, Index m4=-1, Index m5=-1, Index m6=-1) { Index dim[7] = {m0, m1, m2, m3, m4, m5, m6}; // Check invalid dimensions for (int i = 0; i < Rank; ++i) { if (dim[i] < 0) { throw invalid_dimension("Invalid dimensions in array resize" ADEPT_EXCEPTION_LOCATION); } } resize_column_major(dim); } // Resize with contiguous storage and integer arguments void resize_contiguous(Index m0, Index m1=-1, Index m2=-1, Index m3=-1, Index m4=-1, Index m5=-1, Index m6=-1) { Index dim[7] = {m0, m1, m2, m3, m4, m5, m6}; // Check invalid dimensions for (int i = 0; i < Rank; ++i) { if (dim[i] < 0) { throw invalid_dimension("Invalid dimensions in array resize" ADEPT_EXCEPTION_LOCATION); } } resize(dim, true); } protected: // Initialize with "MyRank" explicit dimensions, the function // only being defined if MyRank is equal to the actual Rank of // the Array template typename internal::enable_if::type resize_(Index m0, Index m1=-1, Index m2=-1, Index m3=-1, Index m4=-1, Index m5=-1, Index m6=-1) { Index dim[7] = {m0, m1, m2, m3, m4, m5, m6}; resize(dim); } // Vectorization of arrays of rank>1 is possible provided that the // fastest varying dimension has padding, if necessary, to ensure // alignment template typename internal::enable_if1)&&!Packet::is_vectorized), bool>::type columns_aligned_() const { return true; } template typename internal::enable_if<(ARank>1)&&Packet::is_vectorized,bool>::type columns_aligned_() const { return offset_[Rank-2] % Packet::size == 0; } public: bool is_aliased_(const Type* mem1, const Type* mem2) const { Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (ptr_begin <= mem2 && ptr_end >= mem1) { return true; } else { return false; } } bool all_arrays_contiguous_() const { return offset_[Rank-1] == 1 && columns_aligned_(); } // Is the first data element aligned to a packet boundary? bool is_aligned_() const { return !(reinterpret_cast(data_) & Packet::align_mask); // If we could union data with a uintptr_t object then we could // do the following, but there is no guarantee that uintptr_t // exists :-( // return !(data_unsigned_int_ & Packet::align_mask); } // Return the number of unaligned elements before reaching the // first element on an alignment boundary, which is in units of // "n" Types. The first "%" argument finds how many elements the // first element is above an alignment boundary; the following bit // then works out how many elements to the next alignment // boundary. template int alignment_offset_() const { // This is rather slow! return (n - (reinterpret_cast(reinterpret_cast(data_))/sizeof(Type)) % n) % n; } Type value_with_len_(const Index& j, const Index& len) const { ADEPT_STATIC_ASSERT(Rank == 1, CANNOT_USE_VALUE_WITH_LEN_ON_ARRAY_OF_RANK_OTHER_THAN_1); return data_[j*offset_[0]]; } std::string expression_string_() const { if (true) { std::string a = internal::array_helper().name(); a += dimensions_.str(); return a; } else { std::stringstream s; print(s); return s.str(); } } // The same as operator=(inactive scalar) but does not put // anything on the stack template typename internal::enable_if::value, Array&>::type set_value(RType x) { if (!empty()) { assign_inactive_scalar_(x); } return *this; } // Is the array contiguous in memory? bool is_contiguous() const { Index offset_expected = 1; for (int i = Rank-1; i >= 0; ++i) { if (offset_[i] != offset_expected) { return false; } offset_expected *= dimensions_[i]; } return true; } // Determine whether rows or columns are contiguous in memory and // increasing, needed for calling the BLAS matrix multipliciation // functions; the first can be used to check if the fastest // varying dimension is contiguous, to see if array indexes can be // incremented simply. bool is_row_contiguous() const { // ADEPT_STATIC_ASSERT(Rank == 2, CANNOT_CHECK_ROW_CONTIGUOUS_IF_NOT_MATRIX); // return offset_[1] == 1; if (Rank > 1) { return offset_[Rank-1] == 1 && offset_[Rank-2] >= dimensions_[Rank-1]; } else { return offset_[Rank-1] == 1; } } bool is_column_contiguous() const { ADEPT_STATIC_ASSERT(Rank == 2, CANNOT_CHECK_COLUMN_CONTIGUOUS_IF_NOT_MATRIX); return offset_[0] == 1; } public: // Return the gradient index for the first element in the array, // or -1 if not active Index gradient_index() const { // ADEPT_STATIC_ASSERT(IsActive, CANNOT_ACCESS_GRADIENT_INDEX_OF_INACTIVE_ARRAY); // return my_gradient_index(); return internal::GradientIndex::get(); } /* std::ostream& print(std::ostream& os) const { if (empty()) { os << "(empty " << Rank << "-D array)"; } else if (adept::internal::array_print_curly_brackets) { adept::ExpressionSize i(0); int my_rank = -1; if (Rank > 1) { os << "\n"; } do { for (int r = 0; r < my_rank+1; r++) { os << " "; } for (int r = my_rank+1; r < Rank; r++) { os << "{"; } for (i[Rank-1] = 0; i[Rank-1] < dimensions_[Rank-1]-1; ++i[Rank-1]) { os << data_[index_(i)] << ", "; } os << data_[index_(i)]; my_rank = Rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dimensions_[my_rank]) { i[my_rank] = 0; os << "}"; } else { os << "},\n"; break; } } } while (my_rank >= 0); if (Rank > 1) { os << "}"; // "}/n" } else { os << "}"; } } else { adept::ExpressionSize i(0); int my_rank; do { for (i[Rank-1] = 0; i[Rank-1] < dimensions_[Rank-1]; ++i[Rank-1]) { os << " " << data_[index_(i)]; } my_rank = Rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dimensions_[my_rank]) { i[my_rank] = 0; } else { break; } } os << "\n"; } while (my_rank >= 0); } return os; } */ std::ostream& print(std::ostream& os) const { using namespace internal; if (empty()) { os << array_print_empty_before; if (array_print_empty_rank) { os << Rank; } os << array_print_empty_after; } else if (Rank == 1) { // Print a vector os << vector_print_before << data_[0]; for (int i = 1; i < dimensions_[0]; ++i) { os << vector_separator << data_[i*offset_[0]]; } os << vector_print_after; } else { // Print a multi-dimensional array adept::ExpressionSize i(0); int my_rank = -1; os << array_print_before; do { if (array_print_indent) { if (my_rank >= 0) { os << " "; for (int r = 0; r < my_rank*static_cast(array_opening_bracket.size()); r++) { os << " "; } } } if (my_rank == -1) { for (int r = 1; r < Rank; r++) { os << array_opening_bracket; } } else { for (int r = my_rank+1; r < Rank; r++) { os << array_opening_bracket; } } for (i[Rank-1] = 0; i[Rank-1] < dimensions_[Rank-1]-1; ++i[Rank-1]) { os << data_[index_(i)] << array_contiguous_separator; } os << data_[index_(i)]; my_rank = Rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dimensions_[my_rank]) { i[my_rank] = 0; os << array_closing_bracket; } else { os << array_closing_bracket << array_non_contiguous_separator; break; } } } while (my_rank >= 0); os << array_print_after; } return os; } // Get pointers to the first and last data members in memory. void data_range(Type const * &data_begin, Type const * &data_end) const { data_begin = data_; data_end = data_; for (int i = 0; i < Rank; i++) { if (offset_[i] >= 0) { data_end += (dimensions_[i]-1)*offset_[i]; } else { data_begin += (dimensions_[i]-1)*offset_[i]; } } } // The Stack::independent(x) and Stack::dependent(y) functions add // the gradient_index of objects x and y to std::vector // objects in Stack. Since x and y may be scalars or arrays, this // is best done by delegating to the Active or Array classes. template void push_gradient_indices(std::vector& vec) const { ADEPT_STATIC_ASSERT(IsActive, CANNOT_PUSH_GRADIENT_INDICES_FOR_INACTIVE_ARRAY); ExpressionSize i(0); Index gradient_ind = gradient_index(); Index index = 0; int my_rank; vec.reserve(vec.size() + size()); do { // Innermost loop - note that the counter is index, not max_index for (Index max_index = index + dimensions_[Rank-1]*offset_[Rank-1]; index < max_index; index += offset_[Rank-1]) { vec.push_back(gradient_ind + index); } // Increment counters appropriately depending on which // dimensions have been finished advance_index(index, my_rank, i); } while (my_rank >= 0); } // Return inactive array linked to original data Array inactive_link() { return Array(data_, storage_, dimensions_, offset_); } // Perform an in-place transpose for 2D arrays only Array& in_place_transpose() { ADEPT_STATIC_ASSERT(Rank == 2, IN_PLACE_TRANSPOSE_ONLY_POSSIBLE_WITH_2D_ARRAYS); Index tmp; // Swap dimensions tmp = dimensions_[0]; dimensions_[0] = dimensions_[1]; dimensions_[1] = tmp; // Swap offsets tmp = offset_[0]; offset_[0] = offset_[1]; offset_[1] = tmp; return *this; } // Transpose helper functions protected: template typename internal::enable_if >::type my_T() { // Transpose 2D array: create output array initially as link // to input array Array<2,Type,IsActive> out(*this); // Swap dimensions return out.in_place_transpose(); } template typename internal::enable_if >::type my_T() const { // Transpose 2D array: create output array initially as link // to input array Array<2,Type,IsActive> out(const_cast(*this)); // Swap dimensions return out.in_place_transpose(); } public: // Out-of-place transpose Array<2,Type,IsActive> T() { ADEPT_STATIC_ASSERT(Rank == 1 || Rank == 2, TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS); return my_T(); } const Array<2,Type,IsActive> T() const { ADEPT_STATIC_ASSERT(Rank == 1 || Rank == 2, TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS); return my_T(); } // "permute" is a generalized transpose, returning an Array linked // to the current one but with the dimensions rearranged according // to idim: idim[0] is the 0-based number of the dimension of the // current array that will be dimension 0 of the new array, // idim[1] is the number of the dimension of the current array // that will be dimension 1 of the new array and so on. Array permute(const Index* idim) { if (empty()) { throw empty_array("Attempt to permute an empty array" ADEPT_EXCEPTION_LOCATION); } ExpressionSize new_dims(0); ExpressionSize new_offset; for (int i = 0; i < Rank; ++i) { if (idim[i] >= 0 && idim[i] < Rank) { new_dims[i] = dimensions_[idim[i]]; new_offset[i] = offset_[idim[i]]; } else { throw invalid_dimension("Dimensions must be in range 0 to Rank-1 in permute" ADEPT_EXCEPTION_LOCATION); } } for (int i = 0; i < Rank; ++i) { if (new_dims[i] == 0) { throw invalid_dimension("Missing dimension in permute" ADEPT_EXCEPTION_LOCATION); } } return Array(data_, storage_, new_dims, new_offset); } Array permute(const ExpressionSize& idim) { return permute(&idim[0]); } // Up to 7 dimensions we can specify the dimensions as separate // arguments typename internal::enable_if<(Rank < 7), Array>::type permute(Index i0, Index i1, Index i2 = -1, Index i3 = -1, Index i4 = -1, Index i5 = -1, Index i6 = -1) { Index idim[7] = {i0, i1, i2, i3, i4, i5, i6}; for (int i = 0; i < Rank; ++i) { if (idim[i] == -1) { throw invalid_dimension("Incorrect number of dimensions provided to permute" ADEPT_EXCEPTION_LOCATION); } } return permute(idim); } // Only applicable to vectors, return a multi-dimensional array // that links to the data in the vector template Array reshape(const ExpressionSize& dims) { ADEPT_STATIC_ASSERT(Rank == 1, CANNOT_RESHAPE_MULTIDIMENSIONAL_ARRAY); Index new_size = 1; for (int i = 0; i < NewRank; ++i) { new_size *= dims[i]; } if (new_size != dimensions_[0]) { throw invalid_dimension("Size of reshaped array does not match original vector"); } ExpressionSize offset; offset[NewRank-1] = offset_[0]; for (int i = NewRank-2; i >= 0; --i) { offset[i] = dims[i+1]*offset[i+1]; } return Array(data_,storage_,dims,offset); } // More convenient interfaces to reshape providing a list of // integer dimensions Array<2,Type,IsActive> reshape(Index i0, Index i1) { return reshape(ExpressionSize<2>(i0,i1)); } Array<3,Type,IsActive> reshape(Index i0, Index i1, Index i2) { return reshape(ExpressionSize<2>(i0,i1,i2)); } Array<4,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3) { return reshape(ExpressionSize<2>(i0,i1,i2,i3)); } Array<5,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3, Index i4) { return reshape(ExpressionSize<2>(i0,i1,i2,i3,i4)); } Array<6,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3, Index i4, Index i5) { return reshape(ExpressionSize<2>(i0,i1,i2,i3,i4,i5)); } Array<7,Type,IsActive> reshape(Index i0, Index i1, Index i2, Index i3, Index i4, Index i5, Index i6) { return reshape(ExpressionSize<2>(i0,i1,i2,i3,i4,i5,i6)); } // Return an Array that is a "soft" link to the data in the // present array; that is, it does not copy the Storage object and // increase the reference counter therein. This is useful in a // multi-threaded environment when multiple threads may wish to // subset the same array. Array soft_link() { return Array(data_,0,dimensions_,offset_,gradient_index()); } const Array soft_link() const { return Array(data_,0,dimensions_,offset_,gradient_index()); } // Place gradients associated with the present active array into // the equivalent passive array provided as an argument template void get_gradient(Array& gradient) const { ADEPT_STATIC_ASSERT(IsActive,CANNOT_USE_GET_GRADIENT_ON_INACTIVE_ARRAY); if (gradient.empty()) { gradient.resize(dimensions_); } else if (gradient.dimensions() != dimensions_) { throw size_mismatch("Attempt to get_gradient with array of different dimensions" ADEPT_EXCEPTION_LOCATION); } static const int last = Rank-1; ExpressionSize target_offset = gradient.offset(); ExpressionSize i(0); Index index = 0; int my_rank; Index index_target = 0; Index last_dim_stretch = dimensions_[last]*offset_[last]; MyType* target = gradient.data(); do { i[last] = 0; index_target = 0; for (int r = 0; r < Rank-1; r++) { index_target += i[r]*target_offset[r]; } ADEPT_ACTIVE_STACK->get_gradients(gradient_index()+index, gradient_index()+index+last_dim_stretch, target+index_target, offset_[last], target_offset[last]); index += last_dim_stretch; advance_index(index, my_rank, i); } while (my_rank >= 0); } // Return an inactive array of the same type and rank as the // present active array containing the gradients associated with // it Array get_gradient() const { Array gradient; get_gradient(gradient); return gradient; } // Set gradients associated with the present active array to // the equivalent passive array provided as an argument template void set_gradient(const Array& gradient) const { ADEPT_STATIC_ASSERT(IsActive,CANNOT_USE_SET_GRADIENT_ON_INACTIVE_ARRAY); if (gradient.dimensions() != dimensions_) { throw size_mismatch("Attempt to set_gradient to an array of different dimensions" ADEPT_EXCEPTION_LOCATION); } static const int last = Rank-1; ExpressionSize src_offset = gradient.offset(); ExpressionSize i(0); Index index = 0; int my_rank; Index index_src = 0; Index last_dim_stretch = dimensions_[last]*offset_[last]; const MyType* src = gradient.data(); do { i[last] = 0; index_src = 0; for (int r = 0; r < Rank-1; r++) { index_src += i[r]*src_offset[r]; } ADEPT_ACTIVE_STACK->set_gradients(gradient_index()+index, gradient_index()+index+last_dim_stretch, src+index_src, src_offset[last], offset_[last]); index += last_dim_stretch; advance_index(index, my_rank, i); } while (my_rank >= 0); } // std::vector::type> // std_vector() const { // ADEPT_STATIC_ASSERT(Rank == 1, STD_VECTOR_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS); // std::vector::type> data(dimensions_[0]); // for (Index i = 0; i < dimensions_[0]; ++i) { // data[i] = (*this)(i); // } // return data; // } void put(std::vector::type>& data) const { ADEPT_STATIC_ASSERT(Rank == 1, PUT_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS); if (data.size() != dimensions_[0]) { data.resize(dimensions_[0]); } for (Index i = 0; i < dimensions_[0]; ++i) { data[i] = (*this)(i); } } void get(const std::vector::type>& data) { ADEPT_STATIC_ASSERT(Rank == 1, GET_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS); if (data.size() != dimensions_[0]) { resize(data.size()); } for (Index i = 0; i < dimensions_[0]; ++i) { (*this)(i) = data[i]; } } // ------------------------------------------------------------------- // Array: 6. Member functions accessed by the Expression class // ------------------------------------------------------------------- template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { index[MyArrayNum] = index_(i); } template Type value_at_location_(const ExpressionSize& loc) const { return data_[loc[MyArrayNum]]; } template Packet packet_at_location_(const ExpressionSize& loc) const { return Packet(data_+loc[MyArrayNum]); } Type& lvalue_at_location(const Index& loc) { return data_[loc]; } // Return a scalar template typename internal::enable_if::value, Type>::type values_at_location_(const ExpressionSize& loc) const { return data_[loc[MyArrayNum]]; } // Return a Paket from an aligned memory address template typename internal::enable_if,PacketType>::value, PacketType>::type values_at_location_(const ExpressionSize& loc) const { return Packet(data_+loc[MyArrayNum]); } // Return a Paket from an unaligned memory address template typename internal::enable_if,PacketType>::value, PacketType>::type values_at_location_(const ExpressionSize& loc) const { // integer dummy second argument indicates unaligned load return Packet(data_+loc[MyArrayNum], 0); } // Return a scalar template typename internal::enable_if::value, Type>::type values_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return data_[loc[MyArrayNum]]; } // Return a Paket from an aligned memory address template typename internal::enable_if,PacketType>::value, PacketType>::type values_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return Packet(data_+loc[MyArrayNum]); } // Return a Paket from an unaligned memory address template typename internal::enable_if,PacketType>::value, PacketType>::type values_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return Packet(data_+loc[MyArrayNum], 0); } template Type value_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return data_[loc[MyArrayNum]]; } template Type value_stored_(const ExpressionSize& loc, const internal::ScratchVector& scratch) const { return data_[loc[MyArrayNum]]; } template void advance_location_(ExpressionSize& loc) const { loc[MyArrayNum] += offset_[Rank-1]; } // If an expression leads to calc_gradient being called on an // active object, we push the multiplier and the gradient index on // to the operation stack (or 1.0 if no multiplier is specified template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch) const { stack.push_rhs(1.0, gradient_index() + loc[MyArrayNum]); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch, const MyType& multiplier) const { stack.push_rhs(multiplier, gradient_index() + loc[MyArrayNum]); } template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector >& scratch, internal::ScratchVector >& gradients) const { stack.push_rhs_indices::size,NActive>(gradient_index() + loc[MyArrayNum]); gradients[MyActiveNum] = Packet(static_cast(1.0)); } template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector >& scratch, internal::ScratchVector >& gradients, const MyType& multiplier) const { stack.push_rhs_indices::size,NActive>(gradient_index() + loc[MyArrayNum]); gradients[MyActiveNum] = multiplier; } // ------------------------------------------------------------------- // Array: 7. Protected member functions // ------------------------------------------------------------------- protected: // Set the memory offsets from the array dimensions either // assuming C++-style row-major order, or Fortran-style // column-major order. The pack_() function spaces the data so // that all arrays are aligned to packet boundaries, to facilitate // vectorization. void pack_row_major_() { offset_[Rank-1] = 1; if (Rank > 1) { // Round up to nearest packet size so that all rows are aligned if (dimensions_[Rank-1] >= Packet::size*2) { offset_[Rank-2] = ((dimensions_[Rank-1] + Packet::size - 1) / Packet::size) * Packet::size; } else { offset_[Rank-2] = dimensions_[Rank-1]; } for (int i = Rank-3; i >= 0; --i) { offset_[i] = dimensions_[i+1]*offset_[i+1]; } } } void pack_column_major_() { offset_[0] = 1; for (int i = 1; i < Rank; ++i) { offset_[i] = dimensions_[i-1]*offset_[i-1]; } } void pack_() { if (internal::array_row_major_order) { pack_row_major_(); } else { pack_column_major_(); } } // ...while the pack_contiguous_() function makes sure all data // are contiguous in memory void pack_row_major_contiguous_() { offset_[Rank-1] = 1; for (int i = Rank-2; i >= 0; --i) { offset_[i] = dimensions_[i+1]*offset_[i+1]; } } void pack_contiguous_() { if (internal::array_row_major_order) { pack_row_major_contiguous_(); } else { pack_column_major_(); } } // Return the memory index (relative to data_) for array element // indicated by j Index index_(Index j[Rank]) const { Index o = 0; for (int i = 0; i < Rank; i++) { o += j[i]*offset_[i]; } return o; } Index index_(const ExpressionSize& j) const { Index o = 0; for (int i = 0; i < Rank; i++) { o += j[i]*offset_[i]; } return o; } // Used in traversing through an array void advance_index(Index& index, int& rank, ExpressionSize& i) const { index -= offset_[Rank-1]*dimensions_[Rank-1]; rank = Rank-1; while (--rank >= 0) { if (++i[rank] >= dimensions_[rank]) { i[rank] = 0; index -= offset_[rank]*(dimensions_[rank]-1); } else { index += offset_[rank]; break; } } } // When assigning a scalar to a whole array, there may be // advantage in specialist behaviour depending on the rank of the // array. This is a generic one that copies the number but treats // the present array as passive. template typename internal::enable_if::type assign_inactive_scalar_(X x) { ExpressionSize i(0); Index index = 0; int my_rank; do { // Innermost loop - note that the counter is index, not max_index for (Index max_index = index + dimensions_[LocalRank-1]*offset_[LocalRank-1]; index < max_index; index += offset_[LocalRank-1]) { data_[index] = x; } // Increment counters appropriately depending on which // dimensions have been finished advance_index(index, my_rank, i); } while (my_rank >= 0); } // An active array being assigned the value of an inactive scalar template typename internal::enable_if::type assign_inactive_scalar_(X x) { // If not recording we call the inactive version instead #ifdef ADEPT_RECORDING_PAUSABLE if (! ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar_(x); return; } #endif ExpressionSize i(0); Index gradient_ind = gradient_index(); Index index = 0; int my_rank; do { // Innermost loop ADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimensions_[LocalRank-1], offset_[LocalRank-1]); for (Index max_index = index + dimensions_[LocalRank-1]*offset_[LocalRank-1]; index < max_index; index += offset_[LocalRank-1]) { data_[index] = x; } // Increment counters appropriately depending on which // dimensions have been finished advance_index(index, my_rank, i); } while (my_rank >= 0); } // When copying an expression to a whole array, there may be // advantage in specialist behaviour depending on the rank of the // array template inline typename internal::enable_if::is_vectorizable || !internal::is_same::value),void>::type assign_expression_(const E& rhs) { ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY); ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; static const int last = LocalRank-1; // FIX!!! if (false) { //rhs.all_arrays_contiguous()) { do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { // Note that this is faster as we know that all indices // need to be incremented by 1 data_[index] = rhs.next_value_contiguous(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } else { do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { data_[index] = rhs.next_value(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } } // Vectorized version for Rank-1 arrays template inline //__attribute__((always_inline)) typename internal::enable_if::is_vectorizable && LocalRank == 1 && internal::is_same::value,void>::type // Removing the reference speeds things up because otherwise E // is dereferenced each loop // assign_expression_(const E& __restrict rhs) { assign_expression_(const E rhs) { ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY); ExpressionSize<1> i(0); ExpressionSize::n_arrays> ind(0); if (dimensions_[0] >= Packet::size*2 && offset_[0] == 1 && rhs.all_arrays_contiguous() ) { // Contiguous source and destination data Index istartvec = 0; Index iendvec = 0; istartvec = rhs.alignment_offset(); if (istartvec < 0 || istartvec != alignment_offset_::size>()) { istartvec = iendvec = 0; } else { // Adjust iendvec such that iendvec-istartvec is a multiple // of the packet size iendvec = (dimensions_[0]-istartvec); iendvec -= (iendvec % Packet::size); iendvec += istartvec; } i[0] = 0; rhs.set_location(i, ind); Type* const __restrict t = data_; // Avoids an unnecessary load for some reason // Innermost loop for (int index = 0; index < istartvec; ++index) { // Scalar version t[index] = rhs.next_value_contiguous(ind); } for (int index = istartvec ; index < iendvec; index += Packet::size) { // Vectorized version // rhs.next_packet(ind).put(data_+index) // FIX may need unaligned store rhs.next_packet(ind).put(t+index); } for (int index = iendvec ; index < dimensions_[0]; ++index) { // Scalar version t[index] = rhs.next_value_contiguous(ind); } } else { // Non-contiguous source or destination data i[0] = 0; rhs.set_location(i, ind); Type* const __restrict t = data_; // Avoids an unnecessary load for some reason for (int index = 0; i[0] < dimensions_[0]; ++i[0], index += offset_[0]) { t[index] = rhs.next_value(ind); } } } // Vectorized version template inline typename internal::enable_if::is_vectorizable && (LocalRank > 1) && internal::is_same::value,void>::type // Removing the reference speeds things up because otherwise E // is dereferenced each loop // assign_expression_(const E& rhs) assign_expression_(const E rhs) { ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY); ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; static const int last = LocalRank-1; if (dimensions_[last] >= Packet::size*2 && all_arrays_contiguous_() && rhs.all_arrays_contiguous()) { // Contiguous source and destination data int iendvec; int istartvec = rhs.alignment_offset(); if (istartvec < 0 || istartvec != alignment_offset_::size>()) { istartvec = iendvec = 0; } else { iendvec = (dimensions_[last]-istartvec); iendvec -= (iendvec % Packet::size); iendvec += istartvec; } do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < istartvec; ++i[last], ++index) { // Scalar version data_[index] = rhs.next_value_contiguous(ind); } Type* const __restrict t = data_; // Avoids an unnecessary load for some reason for ( ; i[last] < iendvec; i[last] += Packet::size, index += Packet::size) { // Vectorized version // rhs.next_packet(ind).put(data_+index); // FIX may need unaligned store rhs.next_packet(ind).put(t+index); } for ( ; i[last] < dimensions_[last]; ++i[last], ++index) { // Scalar version data_[index] = rhs.next_value_contiguous(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } else { // Non-contiguous source or destination data do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { data_[index] = rhs.next_value(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } } template inline typename internal::enable_if::type // assign_expression_(const E& rhs) { assign_expression_(const E rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; static const int last = LocalRank-1; ADEPT_ACTIVE_STACK->check_space(internal::expr_cast::n_active * size()); if (internal::expr_cast::is_vectorizable && rhs.all_arrays_contiguous()) { // Contiguous source and destination data Type* const __restrict t = data_; // Avoids an unnecessary load for some reason do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { t[index] = rhs.next_value_and_gradient_contiguous(*ADEPT_ACTIVE_STACK, ind); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active? } advance_index(index, my_rank, i); } while (my_rank >= 0); } else { // Non-contiguous source or destination data Type* const __restrict t = data_; // Avoids an unnecessary load for some reason do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { t[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, ind); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active? } advance_index(index, my_rank, i); } while (my_rank >= 0); } } template inline typename internal::enable_if::type assign_expression_(const E& rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; Index gradient_ind = gradient_index(); static const int last = LocalRank-1; do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop ADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimensions_[LocalRank-1], offset_[LocalRank-1]); for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { data_[index] = rhs.next_value(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) { ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); Index index = 0; int my_rank; static const int last = Rank-1; do { i[last] = 0; bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { if (bool_expr.next_value(bool_ind)) { data_[index] = rhs; } } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) { #ifdef ADEPT_RECORDING_PAUSABLE if (! ADEPT_ACTIVE_STACK->is_recording()) { assign_conditional_inactive_scalar_(bool_expr, rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); Index index = 0; int my_rank; static const int last = Rank-1; do { i[last] = 0; bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { if (bool_expr.next_value(bool_ind)) { data_[index] = rhs; ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); } } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_(const B& bool_expr, const C& rhs) { ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); ExpressionSize::n_arrays> rhs_ind(0); Index index = 0; int my_rank; static const int last = Rank-1; bool is_gap = false; do { i[last] = 0; rhs.set_location(i, rhs_ind); bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { if (bool_expr.next_value(bool_ind)) { if (is_gap) { rhs.set_location(i, rhs_ind); is_gap = false; } data_[index] = rhs.next_value(rhs_ind); } else { is_gap = true; } } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_(const B& bool_expr, const C& rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_conditional_(bool_expr, rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); ExpressionSize::n_arrays> rhs_ind(0); Index index = 0; int my_rank; static const int last = Rank-1; bool is_gap = false; ADEPT_ACTIVE_STACK->check_space(internal::expr_cast::n_active * size()); do { i[last] = 0; rhs.set_location(i, rhs_ind); bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimensions_[last]; ++i[last], index += offset_[last]) { if (bool_expr.next_value(bool_ind)) { if (is_gap) { rhs.set_location(i, rhs_ind); is_gap = false; } data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, rhs_ind); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active? } else { is_gap = true; } } advance_index(index, my_rank, i); } while (my_rank >= 0); } // ------------------------------------------------------------------- // Array: 8. Static variables // ------------------------------------------------------------------- public: void print_style(ArrayPrintStyle ps); // ------------------------------------------------------------------- // Array: 9. Data // ------------------------------------------------------------------- protected: Type* __restrict data_; // Pointer to values Storage* storage_; // Pointer to Storage object ExpressionSize dimensions_; // Size of each dimension ExpressionSize offset_; // Memory offset for each dimension }; // End of Array class // ------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------- // Set the default ordering of arrays: if "true" use C-style // row-major ordering, otherwise use Fortran-style column-major // ordering inline void set_array_row_major_order(bool o = true) { internal::array_row_major_order = o; } // Set the print style void set_array_print_style(ArrayPrintStyle ps); inline ArrayPrintStyle get_array_print_style() { return internal::array_print_style; } // Change whether or not curly brackets are printed when arrays are // sent to a stream with the << operator inline void set_array_print_curly_brackets(bool o = true) { if (o) { set_array_print_style(PRINT_STYLE_CURLY); } else { set_array_print_style(PRINT_STYLE_PLAIN); } } // Print array on a stream template inline std::ostream& operator<<(std::ostream& os, const Array& A) { return A.print(os); } // Extract inactive part of array, working correctly depending on // whether argument is active or inactive template inline Array& value(Array& expr) { return expr; } template inline Array value(Array& expr) { return expr.inactive_link(); } // Print an array expression on a stream template inline typename internal::enable_if<(E::rank > 0), std::ostream&>::type operator<<(std::ostream& os, const Expression& expr) { Array A; A.assign_inactive(expr); return A.print(os); } // ------------------------------------------------------------------- // Transpose function // ------------------------------------------------------------------- // Transpose 2D array template inline Array<2,Type,IsActive> transpose(Array<2,Type,IsActive>& in) { // Create output array initially as link to input array Array<2,Type,IsActive> out(in); // Swap dimensions return out.in_place_transpose(); } // Transpose 1D array, treating it as a length N column vector, so // returning a 1xN 2D array template inline Array<2,Type,IsActive> transpose(Array<1,Type,IsActive>& in) { return Array<2,Type,IsActive>(in.data(), in.storage(), ExpressionSize<2>(1,in.dimension(0)), ExpressionSize<2>(in.dimension(0)*in.offset(0),in.offset(0))); } // Transpose a 2D expression template inline typename internal::enable_if >::type transpose(const Expression& in) { // Create output array by evaluating input expression Array<2,Type,E::is_active> out(in); // Swap dimensions return out.in_place_transpose(); } // Transpose a 1D expression template inline typename internal::enable_if >::type transpose(const Expression& in) { Array<1,Type,E::is_active> out_1D(in); return Array<2,Type,E::is_active>(out_1D.data(), out_1D.storage(), ExpressionSize<2>(1,out_1D.dimension(0)), ExpressionSize<2>(out_1D.dimension(0)*out_1D.offset(0),out_1D.offset(0))); } // Extract the gradients from an active Array after the // Stack::forward or Stack::reverse functions have been called template inline void get_gradients(const Array& a, Array& data) { data = a.get_gradient(); } } // End namespace adept #endif ================================================ FILE: include/adept/ArrayWrapper.h ================================================ /* ArrayWrapper.h -- Make Arrays work faster in expressions Copyright (C) 2016-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptArrayWrapper_H #define AdeptArrayWrapper_H 1 //#include namespace adept { // Forward declaration of Array class template class Array; namespace internal { template struct ArrayWrapper : public Expression > { typedef Array MyArray; // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = IsActive; static const bool is_lvalue = true; static const int rank = Rank; static const int n_active = IsActive * (1 + is_complex::value); static const int n_scratch = 0; static const int n_arrays = 1; static const bool is_vectorizable = MyArray::is_vectorizable; ArrayWrapper(const MyArray& a) : data(a.const_data()), array(a) { } bool get_dimensions_(ExpressionSize& dim) const { return array.get_dimensions_(dim); } std::string expression_string_() const { return std::string("wrapped") + array.expression_string_(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return array.is_aliased(mem1, mem2); } bool all_arrays_contiguous_() const { return array.all_arrays_contiguous_(); } bool is_aligned_() const { return array.is_aligned_(); } template int alignment_offset_() const { return array.template alignment_offset_(); } Type value_with_len_(const Index& j, const Index& len) const { return array.value_with_len_(j,len); } // Optimize by storing the offset of the fastest-varying dimension? template void advance_location_(ExpressionSize& loc) const { array.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return data[loc[MyArrayNum]]; } template Packet packet_at_location_(const ExpressionSize& loc) const { return Packet(data+loc[MyArrayNum]); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return data[loc[MyArrayNum]]; } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return data[loc[MyArrayNum]]; } template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { array.template set_location_(i, index); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { array.template calc_gradient_(stack, loc, scratch); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { array.template calc_gradient_(stack, loc, scratch, multiplier); } protected: // typedef Type __attribute__((aligned(32))) aligned_type; Type const * const __restrict data; //aligned_type const * const __restrict data; const MyArray& __restrict array; }; // Unary and binary operations normally contain constant // references to their arguments, but if that reference is an // Array then the compiler represents this reference as a pointer // that must be dereferenced every time a value is extracted from // the Array. To speed this up, nested_expression::type // is used to obtain the constant reference to ExprType, but for // passive Arrays an ArrayWrapper object is returned instead that // is faster. template struct nested_expression { typedef const T& __restrict type; }; template struct nested_expression > { typedef const ArrayWrapper type; }; template class Op, class R> struct UnaryOperation; template struct BinaryOperation; // Should we check that rank is > 1? template class Op, class R> struct nested_expression > { typedef UnaryOperation type; }; template struct nested_expression > { typedef BinaryOperation type; }; } } #endif ================================================ FILE: include/adept/BinaryOperation.h ================================================ /* BinaryOperation.h -- Binary operations on Adept expressions Copyright (C) 2014-2018 European Centre for Medium-Range Weather Forecasts Robin Hogan This file is part of the Adept library. */ #ifndef AdeptBinaryOperation_H #define AdeptBinaryOperation_H #include #include namespace adept { namespace internal { // --------------------------------------------------------------------- // SECTION 4.1: Binary operations: define BinaryOperation type // --------------------------------------------------------------------- // Binary operations derive from this class, where Op is a policy // class defining how to implement the operation and L and R are // the arguments to the operation template struct BinaryOperation : public Expression >, protected Op { // Static data static const int rank = (L::rank > R::rank ? L::rank : R::rank); static const bool is_active = (L::is_active || R::is_active) && !is_same::value; static const int store_result = is_active * Op::store_result; static const int n_active = expr_cast::n_active + expr_cast::n_active; // Assume the only local scratch variable is the result of the // binary expression static const int n_local_scratch = store_result; // + Op::n_scratch::value static const int n_scratch = n_local_scratch + L::n_scratch + R::n_scratch; static const int n_arrays = L::n_arrays + R::n_arrays; static const bool is_vectorizable = L::is_vectorizable && R::is_vectorizable && Op::is_vectorized && is_same::value; using Op::is_operator; using Op::operation; using Op::operation_string; // DATA //const L& left; //const R& right; const typename nested_expression::type left; const typename nested_expression::type right; BinaryOperation(const Expression& left_, const Expression& right_) : left(left_.cast()), right(right_.cast()) { } template bool get_dimensions_(ExpressionSize& dim) const { return my_get_dimensions(dim); } protected: template typename enable_if::type my_get_dimensions(ExpressionSize& dim) const { ExpressionSize right_dim; return left.get_dimensions(dim) && right.get_dimensions(right_dim) && compatible(dim, right_dim); } template typename enable_if::type my_get_dimensions(ExpressionSize& dim) const { return left.get_dimensions(dim); } template typename enable_if::type my_get_dimensions(ExpressionSize& dim) const { return right.get_dimensions(dim); } template typename enable_if::type my_get_dimensions(ExpressionSize& dim) const { return true; } public: std::string expression_string_() const { std::string str; if (is_operator) { str = "(" + left.expression_string() + operation_string() + right.expression_string() + ")"; } else { str = operation_string(); str += "(" + left.expression_string() + "," + right.expression_string() + ")"; } return str; } bool is_aliased_(const Type* mem1, const Type* mem2) const { return left.is_aliased(mem1, mem2) || right.is_aliased(mem1, mem2); } bool all_arrays_contiguous_() const { return left.all_arrays_contiguous_() && right.all_arrays_contiguous_(); } bool is_aligned_() const { return left.is_aligned_() && right.is_aligned_(); } template int alignment_offset_() const { int l = left.template alignment_offset_(); int r = right.template alignment_offset_(); if (l == r) { return l; } else if (l == n) { return r; } else if (r == n) { return l; } else { return -1; } } Type value_with_len_(const Index& j, const Index& len) const { return operation(left.value_with_len(j,len), right.value_with_len(j,len)); } template void advance_location_(ExpressionSize& loc) const { left.template advance_location_(loc); right.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return operation(left.template value_at_location_(loc), right.template value_at_location_(loc)); } template Packet packet_at_location_(const ExpressionSize& loc) const { return operation(left.template packet_at_location_(loc), right.template packet_at_location_(loc)); } template PacketType values_at_location_(const ExpressionSize& loc) const { return operation(left.template values_at_location_(loc), right.template values_at_location_(loc)); } template PacketType values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return my_values_at_location_store_(loc, scratch); } // Adept-1.x did not store for addition and subtraction! // Moreover, we should ideally not ask inactive arguments to // store their result. template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return my_value_at_location_store_(loc, scratch); } // Adept-1.x did not store for addition and subtraction! template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return my_value_stored_(loc, scratch); } protected: template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation(left.template value_at_location_store_(loc, scratch), right.template value_at_location_store_(loc, scratch)); } // In differentiating "a/b", it helps to store "1/b"; // "operation_store" is only provided by Divide and Atan2 template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = Op::operation_store(left.template value_at_location_store_(loc, scratch), right.template value_at_location_store_(loc, scratch), scratch[MyScratchNum+1]); } // Adept-1.x did not store for addition and subtraction! template typename enable_if<(StoreResult > 0), Type>::type my_value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return operation(left.template value_at_location_store_(loc, scratch), right.template value_at_location_store_(loc, scratch)); } template typename enable_if::type my_value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return operation(left.template value_at_location_(loc), right.template value_at_location_(loc)); } template typename enable_if::type my_values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation(left.template values_at_location_store_(loc, scratch), right.template values_at_location_store_(loc, scratch)); } template typename enable_if::type my_values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = Op::operation_store(left.template values_at_location_store_(loc, scratch), right.template values_at_location_store_(loc, scratch), scratch[MyScratchNum+1]); } template typename enable_if<(StoreResult>0) && UseStored, PacketType>::type my_values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum]; } template typename enable_if::type my_values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return operation(left.template values_at_location_store_(loc, scratch), right.template values_at_location_store_(loc, scratch)); } template typename enable_if::type my_values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return operation(left.template values_at_location_(loc), right.template values_at_location_(loc)); } public: template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { left.template set_location_(i, index); right.template set_location_(i, index); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { calc_left_ (stack, left, loc, scratch); calc_right_(stack, right, loc, scratch); } // As the previous but multiplying the gradient by "multiplier" template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { calc_left_ (stack, left, loc, scratch, multiplier); calc_right_(stack, right, loc, scratch, multiplier); } protected: // Only calculate gradients for left and right arguments if they // are active; otherwise do nothing template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch) const { Op::template calc_left(stack, left, right, loc, scratch); } template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch) const { } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch) const { Op::template calc_right(stack, left, right, loc, scratch); } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch) const { } template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { Op::template calc_left(stack, left, right, loc, scratch, multiplier); } template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { Op::template calc_right(stack, left, right, loc, scratch, multiplier); } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } }; // --------------------------------------------------------------------- // SECTION 4.2: policy classes for BinaryOperation: with scalars // --------------------------------------------------------------------- // Binary operations with a non-Expression on the left-hand-side template struct BinaryOpScalarLeft : public Expression >, protected Op { // Static data static const int rank = R::rank; static const bool is_active = R::is_active && !is_same::value; static const int store_result = is_active * Op::store_result; static const int n_active = expr_cast::n_active; // Assume the only local scratch variable is the result of the // binary expression static const int n_local_scratch = store_result; // + Op::n_scratch::value static const int n_scratch = n_local_scratch + R::n_scratch; static const int n_arrays = R::n_arrays; static const bool is_vectorizable = R::is_vectorizable && Op::is_vectorized && is_same::value; using Op::is_operator; using Op::operation; using Op::operation_string; // DATA Packet left; const R& right; BinaryOpScalarLeft(L left_, const Expression& right_) : left(left_), right(right_.cast()) { } template bool get_dimensions_(ExpressionSize& dim) const { return right.get_dimensions(dim); } std::string expression_string_() const { std::stringstream s; if (is_operator) { s << "(" << left.value() << operation_string() << right.expression_string() << ")"; } else { s << operation_string() << "(" << left.value() << "," << static_cast(&right)->expression_string() << ")"; } return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return right.is_aliased(mem1, mem2); } bool all_arrays_contiguous_() const { return right.all_arrays_contiguous_(); } bool is_aligned_() const { return right.is_aligned_(); } template int alignment_offset_() const { return right.template alignment_offset_(); } Type value_with_len_(const Index& j, const Index& len) const { return operation(left.value(), right.value_with_len(j,len)); } template void advance_location_(ExpressionSize& loc) const { right.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return operation(left.value(), right.template value_at_location_(loc)); } template Packet packet_at_location_(const ExpressionSize& loc) const { return operation(left, right.template packet_at_location_(loc)); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return my_value_at_location_store_(loc, scratch); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return my_value_stored_(loc, scratch); } protected: template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation(left.value(), right.template value_at_location_store_(loc, scratch)); } template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = Op::operation_store(left.value(), right.template value_at_location_store_(loc, scratch), scratch[MyScratchNum+1]); } template typename enable_if<(StoreResult > 0), Type>::type my_value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return operation(left.value(), right.template value_at_location_store_(loc, scratch)); } template typename enable_if::type my_value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return operation(left.value(),right.template value_at_location_(loc)); } public: template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { right.template set_location_(i, index); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { calc_right_(stack, right, loc, scratch); } // As the previous but multiplying the gradient by "multiplier" template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { calc_right_(stack, right, loc, scratch, multiplier); } protected: // Only calculate gradients arguments if they are active; // otherwise do nothing template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch) const { Op::template calc_right(stack, Scalar(left.value()), right, loc, scratch); } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch) const { } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { Op::template calc_right(stack, Scalar(left.value()), right, loc, scratch, multiplier); } template typename enable_if::type calc_right_(Stack& stack, const RType& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } }; // Binary operations with a non-Expression on the right-hand-side template struct BinaryOpScalarRight : public Expression >, protected Op { // Static data static const int rank = L::rank; static const bool is_active = L::is_active && !is_same::value; static const int store_result = is_active * Op::store_result; static const int n_active = expr_cast::n_active; // Assume the only local scratch variable is the result of the // binary expression static const int n_local_scratch = store_result; // + Op::n_scratch::value static const int n_scratch = n_local_scratch + L::n_scratch; static const int n_arrays = L::n_arrays; static const bool is_vectorizable = L::is_vectorizable && Op::is_vectorized && is_same::value; using Op::is_operator; using Op::operation; using Op::operation_string; // DATA const L& left; Packet right; BinaryOpScalarRight(const Expression& left_, R right_) : left(left_.cast()), right(right_) { // Some operations (divide and atan2) store one extra piece of // information during differentiation, so have // store_result==2. This should not be needed when the RHS is // scalar, so has not been implemented. ADEPT_STATIC_ASSERT((!is_active || store_result<2), ERROR_IN_BINARY_OP_SCALAR_RIGHT); } template bool get_dimensions_(ExpressionSize& dim) const { return left.get_dimensions(dim); } std::string expression_string_() const { std::stringstream s; if (is_operator) { s << "(" << left.expression_string() << operation_string() << right.value() << ")"; } else { s << operation_string() << "(" << static_cast(&left)->expression_string() << "," << right.value() << ")"; } return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return left.is_aliased(mem1, mem2); } bool all_arrays_contiguous_() const { return left.all_arrays_contiguous_(); } bool is_aligned_() const { return left.is_aligned_(); } template int alignment_offset_() const { return left.template alignment_offset_(); } Type value_with_len_(const Index& j, const Index& len) const { return operation(left.value_with_len(j,len), right.value()); } template void advance_location_(ExpressionSize& loc) const { left.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return operation(left.template value_at_location_(loc), right.value()); } template Packet packet_at_location_(const ExpressionSize& loc) const { return operation(left.template packet_at_location_(loc), right); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return my_value_at_location_store_(loc, scratch); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return my_value_stored_(loc, scratch); } protected: template typename enable_if<(StoreResult > 0), Type>::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation( left.template value_at_location_store_(loc, scratch), right.value()); } template typename enable_if<(StoreResult > 0), Type>::type my_value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template typename enable_if::type my_value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return operation(left.template value_at_location_store_(loc, scratch), right.value()); } template typename enable_if::type my_value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return operation(left.template value_at_location_(loc), right.value()); } public: template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { left.template set_location_(i, index); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { calc_left_(stack, left, loc, scratch); } // As the previous but multiplying the gradient by "multiplier" template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { calc_left_(stack, left, loc, scratch, multiplier); } protected: // Only calculate gradients arguments if they are active; // otherwise do nothing template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch) const { Op::template calc_left(stack, left, Scalar(right.value()), loc, scratch); } template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch) const { } template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { Op::template calc_left(stack, left, Scalar(right.value()), loc, scratch, multiplier); } template typename enable_if::type calc_left_(Stack& stack, const LType& left, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } }; } // End namespace internal namespace internal { // --------------------------------------------------------------------- // SECTION 4.3: policy classes for BinaryOperation: standard operators // --------------------------------------------------------------------- // Policy class implementing operator+ struct Add { static const bool is_operator = true; // Operator or function for expression_string() static const int store_result = 0; // Do we need any scratch space? static const bool is_vectorized = true; const char* operation_string() const { return "+"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { return left + right; } // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { left.template calc_gradient_(stack, loc, scratch); } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { right.template calc_gradient_(stack, loc, scratch); } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { left.template calc_gradient_(stack, loc, scratch, multiplier); } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { right.template calc_gradient_(stack, loc, scratch, multiplier); } }; // Policy class implementing operator- struct Subtract { static const bool is_operator = true; // Operator or function for expression_string() static const int store_result = 1; // Do we need any scratch space? static const bool is_vectorized = true; const char* operation_string() const { return "-"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { return left - right; } // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { left.template calc_gradient_(stack, loc, scratch); } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { right.template calc_gradient_(stack, loc, scratch, -1.0); } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { left.template calc_gradient_(stack, loc, scratch, multiplier); } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { right.template calc_gradient_(stack, loc, scratch, -multiplier); } }; // Policy class implementing operator* struct Multiply { static const bool is_operator = true; // Operator or function for expression_string() static const int store_result = 1; // Do we need any scratch space? (this can be 0 or 1) static const bool is_vectorized = true; const char* operation_string() const { return "*"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { return left * right; } // Calculate the gradient of the left-hand argument template static void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) { left.template calc_gradient_(stack, loc, scratch, right.template value_stored_(loc, scratch)); } // Calculate the gradient of the right-hand argument template static void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) { right.template calc_gradient_(stack, loc, scratch, left.template value_stored_(loc, scratch)); } // Calculate the gradient of the left-hand argument with a multiplier template static void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) { left.template calc_gradient_(stack, loc, scratch, multiplier *right.template value_stored_(loc, scratch)); } // Calculate the gradient of the right-hand argument with a multiplier template static void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) { right.template calc_gradient_(stack, loc, scratch, multiplier*left.template value_stored_(loc, scratch)); } }; // Policy class implementing operator/ struct Divide { static const bool is_operator = true; // Operator or function for expression_string() static const int store_result = 2; // Do we need any scratch space? (this can be 1 or 2) static const bool is_vectorized = true; const char* operation_string() const { return "/"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { return left / right; } template typename promote::type operation_store(const LType& left, const RType& right, Real& one_over_right) const { one_over_right = 1.0 / right; return left * one_over_right; } // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { // If f(a,b) = a/b then df/da = 1/b // If store_result==1 then do this: //left.template calc_gradient_(stack, loc, scratch, // 1.0 / right.template value_stored_(loc, scratch)); // If store_result==2 then do this: left.template calc_gradient_(stack, loc, scratch, scratch[MyScratchNum+1]); } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { // If f(a,b) = a/b then df/db = -a/(b*b) = -f/b // If store_result==1 then do this: //right.template calc_gradient_(stack, loc, scratch, // -scratch[MyScratchNum] / right.template value_stored_(loc, scratch)); // If store_result==2 then do this: right.template calc_gradient_(stack, loc, scratch, -scratch[MyScratchNum] * scratch[MyScratchNum+1]); } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { // If f(a,b) = a/b then w*df/da = w/b // If store_result==1 then do this: //left.template calc_gradient_(stack, loc, scratch, multiplier // / right.template value_stored_(loc, scratch)); // If store_result==2 then do this: left.template calc_gradient_(stack, loc, scratch, multiplier*scratch[MyScratchNum+1]); } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { // If f(a,b) = a/b then w*df/db = -w*a/(b*b) = -w*f/b // If store_result==1 then do this: //right.template calc_gradient_(stack, loc, scratch, // -multiplier * scratch[MyScratchNum] // / right.template value_stored_(loc, scratch)); // If store_result==2 then do this: right.template calc_gradient_(stack, loc, scratch, -multiplier * scratch[MyScratchNum] * scratch[MyScratchNum+1]); } }; // Policy class implementing function pow struct Pow { static const bool is_operator = false; // Operator or function for expression_string() static const int store_result = 1; // Do we need any scratch space? (this CANNOT be changed) static const bool is_vectorized = false; const char* operation_string() const { return "pow"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { using std::pow; return pow(left, right); } // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { using std::pow; left.template calc_gradient_(stack, loc, scratch, right.template value_stored_(loc, scratch) *pow(left.template value_stored_(loc, scratch), right.template value_stored_(loc, scratch) - 1.0)); } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { using std::log; right.template calc_gradient_(stack, loc, scratch, scratch[MyScratchNum] * log(left.template value_stored_(loc, scratch))); } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { using std::pow; left.template calc_gradient_(stack, loc, scratch, multiplier *right.template value_stored_(loc, scratch) *pow(left.template value_stored_(loc, scratch), right.template value_stored_(loc, scratch) - 1.0)); } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { using std::log; right.template calc_gradient_(stack, loc, scratch, multiplier * scratch[MyScratchNum] * log(left.template value_stored_(loc, scratch))); } }; // Policy class implementing function atan2 struct Atan2 { static const bool is_operator = false; // Operator or function for expression_string() static const int store_result = 2; // Do we need any scratch space? Yes: for left^2+right^2 static const bool is_vectorized = false; const char* operation_string() const { return "atan2"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { using std::atan2; return atan2(left, right); } // Implement the basic operation template typename promote::type operation_store(const LType& left, const RType& right, Real& saved_term) const { using std::atan2; saved_term = 1.0 / (left*left + right*right); return atan2(left, right); } // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { left.template calc_gradient_(stack, loc, scratch, right.template value_stored_(loc, scratch) *scratch[MyScratchNum+1]); } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { right.template calc_gradient_(stack, loc, scratch, -left.template value_stored_(loc, scratch)*scratch[MyScratchNum+1]); } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { left.template calc_gradient_(stack, loc, scratch, right.template value_stored_(loc, scratch) *scratch[MyScratchNum+1]*multiplier); } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { right.template calc_gradient_(stack, loc, scratch, -left.template value_stored_(loc, scratch)*scratch[MyScratchNum+1]*multiplier); } }; // Policy class implementing function max struct Max { static const bool is_operator = false; // Operator or function for expression_string() static const int store_result = 0; // Do we need any scratch space? (this can be 0 or 1) static const bool is_vectorized = true; const char* operation_string() const { return "max"; } // For expression_string() // Implement the basic operation - first the version for packets template typename enable_if::value,LType>::type operation(const LType& left, const RType& right) const { return adept::internal::fmax(left,right); } #ifndef ADEPT_CXX11_FEATURES // For C++98, use simple ternary operation template typename enable_if::value,typename promote::type>::type operation(const LType& left, const RType& right) const { return left < right ? right : left; } #else // For C++11 use the (hopefully faster) fmax function for floating-point functions template typename enable_if::value && (!is_floating_point::value || !is_floating_point::value), typename promote::type>::type operation(const LType& left, const RType& right) const { return left < right ? right : left; } template typename enable_if::value && (is_floating_point::value && is_floating_point::value), typename promote::type>::type operation(const LType& left, const RType& right) const { return std::fmax(left,right); } #endif // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { if (is_left(left,right,loc,scratch)) { left.template calc_gradient_(stack, loc, scratch); } } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { if (!is_left(left,right,loc,scratch)) { right.template calc_gradient_(stack, loc, scratch); } } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { if (is_left(left,right,loc,scratch)) { left.template calc_gradient_(stack, loc, scratch, multiplier); } } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { if (!is_left(left,right,loc,scratch)) { right.template calc_gradient_(stack, loc, scratch, multiplier); } } private: template bool is_left(const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { return left.template value_stored_(loc, scratch) > right.template value_stored_(loc, scratch); } }; // Policy class implementing function min struct Min { static const bool is_operator = false; // Operator or function for expression_string() static const int store_result = 0; // Do we need any scratch space? (this can be 0 or 1) static const bool is_vectorized = true; const char* operation_string() const { return "min"; } // For expression_string() // Implement the basic operation template typename enable_if::value,LType>::type operation(const LType& left, const RType& right) const { return adept::internal::fmin(left,right); } #ifndef ADEPT_CXX11_FEATURES // For C++98, use simple ternary operation template typename enable_if::value,typename promote::type>::type operation(const LType& left, const RType& right) const { return left < right ? left : right; } #else // For C++11 use the (hopefully faster) fmin function for floating-point functions template typename enable_if::value && (!is_floating_point::value || !is_floating_point::value), typename promote::type>::type operation(const LType& left, const RType& right) const { return left < right ? left : right; } template typename enable_if::value && (is_floating_point::value && is_floating_point::value), typename promote::type>::type operation(const LType& left, const RType& right) const { return std::fmin(left,right); } #endif // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { if (is_left(left,right,loc,scratch)) { left.template calc_gradient_(stack, loc, scratch); } } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { if (!is_left(left,right,loc,scratch)) { right.template calc_gradient_(stack, loc, scratch); } } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { if (is_left(left,right,loc,scratch)) { left.template calc_gradient_(stack, loc, scratch, multiplier); } } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { if (!is_left(left,right,loc,scratch)) { right.template calc_gradient_(stack, loc, scratch, multiplier); } } private: template bool is_left(const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { return left.template value_stored_(loc, scratch) <= right.template value_stored_(loc, scratch); } }; // Policy class implementing copysign struct CopySign { static const bool is_operator = false; // Operator or function for expression_string() static const int store_result = 0; // Do we need any scratch space? static const bool is_vectorized = false; const char* operation_string() const { return "copysign"; } // For expression_string() // Implement the basic operation template typename promote::type operation(const LType& left, const RType& right) const { // Not very efficient but no guarantee that copysign function // is available, and also would need to check for // compatibility of left and right types. if (right >= 0) { return left; } else { return -left; } } // Calculate the gradient of the left-hand argument template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { if (is_right_positive(left,right,loc,scratch)) { left.template calc_gradient_(stack, loc, scratch); } else { left.template calc_gradient_(stack, loc, scratch, -1.0); } } // Calculate the gradient of the right-hand argument template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { // Do nothing: gradient of RHS is zero } // Calculate the gradient of the left-hand argument with a multiplier template void calc_left(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { if (is_right_positive(left,right,loc,scratch)) { left.template calc_gradient_(stack, loc, scratch, multiplier); } else { left.template calc_gradient_(stack, loc, scratch, -multiplier); } } // Calculate the gradient of the right-hand argument with a multiplier template void calc_right(Stack& stack, const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { // Do nothing: gradient of RHS is zero } private: template bool is_right_positive(const L& left, const R& right, const ExpressionSize& loc, const ScratchVector& scratch) const { return right.template value_stored_(loc, scratch) >= 0.0; } }; } // End namespace internal #define ADEPT_DEFINE_OPERATION(NAME, OPERATOR) \ template \ inline \ typename internal::enable_if::value, \ internal::BinaryOperation::type, \ L, internal:: NAME, R> >::type \ OPERATOR(const Expression& l, \ const Expression& r) { \ using namespace adept::internal; \ return BinaryOperation::type, \ L, NAME, R>(l.cast(), r.cast()); \ } \ \ template \ inline \ typename internal::enable_if::value, \ internal::BinaryOpScalarLeft::type, \ LType, internal:: NAME, R> >::type \ OPERATOR(const LType& l, const Expression& r) { \ using namespace adept::internal; \ return BinaryOpScalarLeft::type, \ LType, NAME, R>(l, r.cast()); \ } #define ADEPT_DEFINE_SCALAR_RHS_OPERATION(NAME, OPERATOR) \ template \ inline \ typename internal::enable_if::value, \ internal::BinaryOpScalarRight::type, \ L, internal:: NAME, RType> >::type \ OPERATOR(const Expression& l, const RType& r) { \ using namespace adept::internal; \ return BinaryOpScalarRight::type, \ L, NAME, RType>(l.cast(), r); \ } // The following define Expr*Expr and Scalar*Expr ADEPT_DEFINE_OPERATION(Add, operator+) ADEPT_DEFINE_OPERATION(Subtract, operator-) ADEPT_DEFINE_OPERATION(Multiply, operator*) ADEPT_DEFINE_OPERATION(Divide, operator/) ADEPT_DEFINE_OPERATION(Pow, pow) ADEPT_DEFINE_OPERATION(Atan2, atan2) ADEPT_DEFINE_OPERATION(Max, max) ADEPT_DEFINE_OPERATION(Min, min) // If std::max has been brought into scope via a "using" directive // then calling "max" with two arguments of the same type will call // the std::max rather than adept::max function, even if these // arguments are from the adept namespace. This will cause a compile // failure. Likewise with std::min. To avoid this, either don't use // "using std::max", or alternatively use Adept's "fmax" and "fmin" // functions, which do the same thing but match the C++11 functions // std::fmax and std::fmin for floating-point types. Note that you // can use these Adept functions even if you are not using C++11. ADEPT_DEFINE_OPERATION(Max, fmax) ADEPT_DEFINE_OPERATION(Min, fmin) ADEPT_DEFINE_OPERATION(CopySign, copysign) // The following define Expr*Scalar; those in the list above but not // below (e.g. Divide) use a custom implementation of Expr*Scalar ADEPT_DEFINE_SCALAR_RHS_OPERATION(Add, operator+) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Subtract, operator-) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Multiply, operator*) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Pow, pow) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Max, max) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Min, min) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Max, fmax) ADEPT_DEFINE_SCALAR_RHS_OPERATION(Min, fmin) ADEPT_DEFINE_SCALAR_RHS_OPERATION(CopySign, copysign) #undef ADEPT_DEFINE_OPERATION #undef ADEPT_DEFINE_SCALAR_RHS_OPERATION // Treat expression divided by floating-point scalar differently // since this can be changed to a more efficient multiplication template inline typename internal::enable_if::value && (internal::is_floating_point::value || L::is_active), internal::BinaryOpScalarRight::type, L, internal::Multiply, typename internal::promote::type> >::type operator/(const Expression& l, const RType& r) { using namespace adept::internal; typedef typename promote::type PType; return BinaryOpScalarRight(l.cast(), 1.0/static_cast(r)); } // Treat expression divided by any other type of scalar as division, // but differentiation is not properly implemented for dividing by a // scalar, so if the left hand side is active then the version above // (converting to a multiplication) will be used template inline typename internal::enable_if::value && (!internal::is_floating_point::value && !L::is_active), internal::BinaryOpScalarRight::type, L, internal::Divide, typename internal::promote::type> >::type operator/(const Expression& l, const RType& r) { using namespace adept::internal; typedef typename promote::type PType; return BinaryOpScalarRight(l.cast(), static_cast(r)); } // Now the operators returning boolean results #define ADEPT_DEFINE_OPERATOR(NAME, OPERATOR, OPSYMBOL, OPSTRING) \ namespace internal { \ struct NAME { \ static const bool is_operator = true; \ static const int store_result = 0; \ static const bool is_vectorized = false; \ const char* operation_string() const { return OPSTRING; } \ \ template \ bool operation(const LType& left, const RType& right) const \ { return left OPSYMBOL right; } \ }; \ } \ \ template \ inline \ typename internal::enable_if::value \ && (L::rank > 0 || R::rank > 0) , \ internal::BinaryOperation >::type \ OPERATOR(const Expression& l, \ const Expression& r) { \ using namespace adept::internal; \ return BinaryOperation(l.cast(), r.cast()); \ } \ \ template \ inline \ typename internal::enable_if::value \ && (R::rank > 0) , \ internal::BinaryOpScalarLeft >::type \ OPERATOR(const LType& l, const Expression& r) { \ using namespace adept::internal; \ return BinaryOpScalarLeft(l, r.cast()); \ } \ \ template \ inline \ typename internal::enable_if::value \ && (L::rank > 0), \ internal::BinaryOpScalarRight >::type \ OPERATOR(const Expression& l, const RType& r) { \ using namespace adept::internal; \ return BinaryOpScalarRight(l.cast(), r); \ } \ \ template \ inline \ typename internal::enable_if::type \ OPERATOR(const Expression& l, \ const Expression& r) { \ return l.scalar_value() OPSYMBOL r.scalar_value(); \ } \ \ template \ inline \ typename internal::enable_if::value \ && R::rank == 0, bool>::type \ OPERATOR(const LType& l, const Expression& r) { \ return l OPSYMBOL r.scalar_value(); \ } \ \ template \ inline \ typename internal::enable_if::value \ && L::rank == 0, bool>::type \ OPERATOR(const Expression& l, const RType& r) { \ return l.scalar_value() OPSYMBOL r; \ } // These return bool expressions when applied to expressions of rank // greater than zero ADEPT_DEFINE_OPERATOR(GreaterThan, operator>, >, " > ") ADEPT_DEFINE_OPERATOR(LessThan, operator<, <, " < ") ADEPT_DEFINE_OPERATOR(GreaterThanEqualTo, operator>=, >=, " >= ") ADEPT_DEFINE_OPERATOR(LessThanEqualTo, operator<=, <=, " <= ") ADEPT_DEFINE_OPERATOR(EqualTo, operator==, ==, " == ") ADEPT_DEFINE_OPERATOR(NotEqualTo, operator!=, !=, " != ") // These should only work on bool expressions ADEPT_DEFINE_OPERATOR(Or, operator||, ||, " || ") ADEPT_DEFINE_OPERATOR(And, operator&&, &&, " && ") #undef ADEPT_DEFINE_OPERATOR template inline typename internal::enable_if::type value(const Expression& r) { return r.scalar_value(); } } // End namespace adept #endif ================================================ FILE: include/adept/Expression.h ================================================ /* Expression.h -- Base class for arrays and active objects Copyright (C) 2014-2017 European Centre for Medium-Range Weather Forecasts Robin Hogan This file is part of the Adept library. */ #ifndef AdeptExpression_H #define AdeptExpression_H #include #include #include #include #include #include #include namespace adept { using internal::Packet; // --------------------------------------------------------------------- // SECTION 0: Forward declarations // --------------------------------------------------------------------- class Stack; // --------------------------------------------------------------------- // SECTION 1: Definition of Expression type // --------------------------------------------------------------------- // All types of expression derive from Expression. "A" is the // actual type of the expression (a use of the Curiously Recurring // Template Pattern). template struct Expression { // Static information about the expression public: typedef Type type; typedef Type value_type; // STL-style // There are several "static const" members in the derived // classes, some of which require fall-back values, defined here: // By default an expression is not vectorizable. static const bool is_vectorizable = false; // Classes derived from this one that do not define how many // scratch variables, active variables or arrays they contain are // assumed to need zero static const int n_scratch = 0; // Number of active variables in the expression (where each array // counts as 1), used to work out how much space must be reserved // on the operation stack static const int n_active = 0; // Is this an active expression? static const bool is_active = false; // Expressions cannot be lvalues by default static const bool is_lvalue = false; // The presence of _adept_expression_flag is used to define the // adept::is_not_expression trait typedef bool _adept_expression_flag; // Cast the expression to its true type, given by the template // argument const A& cast() const { return static_cast(*this); } // Return the dimensions of the expression template bool get_dimensions(ExpressionSize& dim) const { return cast().get_dimensions_(dim); } // Return a string representation of the expression std::string expression_string() const { return cast().expression_string_(); } Type value_with_len(Index j, Index len) const { ADEPT_STATIC_ASSERT(A::rank<=1, VALUE_WITH_LEN_ONLY_APPLICABLE_TO_ARRAYS_OF_RANK_0_OR_1); return cast().value_with_len_(j, len); } // These functions are for rank-0 expressions where there is no // indexing required Type scalar_value() const { ExpressionSize<0> dummy_index; return cast().template value_at_location_<0>(dummy_index); } // Return true if any memory in the expression lies between mem1 // and mem2: used to test for aliasing when doing assignment. bool is_aliased(const Type* mem1, const Type* mem2) const { return cast().is_aliased_(mem1, mem2); } // Return true if the fastest varying dimension of all the arrays // in the expression are contiguous and increasing. If so, we can // more simply increment their indices. bool all_arrays_contiguous() const { return cast().all_arrays_contiguous_(); } // By default, arrays are contiguous (this fall-back used for // objects that aren't arrays) bool all_arrays_contiguous_() const { return true; } // Are all the arrays in the expression aligned to a Packet // boundary? bool is_aligned() const { return cast().is_aligned(); } // In order to perform optimal vectorization, the first memory // addresses of each inner dimension must be aligned // appropriately, or they should all have the same offset so that // this number of scalar operations can be performed at the start // before begining on vector instructions. This function returns // the offset of the data in any arrays in the expression, or -1 if // there is a clash in offsets. int alignment_offset() const { int val = cast().template alignment_offset_::size>(); if (val < Packet::size) { return val; } else { // Note that if an object returns val==Packet::size then // it indicates that alignment does not matter for this object return 0; } } // Fall-back position is that alignment doesn't matter for this // object, which is encoded by returning n template int alignment_offset_() const { return n; } // If the sub-expression is of a different type from that // requested then we assume there must be no aliasing. template typename internal::enable_if::value, bool>::type is_aliased(const MyType* mem1, const MyType* mem2) const { return false; } Type scalar_value_and_gradient(Stack& stack) const { internal::ScratchVector scratch; ExpressionSize<0> dummy_index; Type val = cast().template value_at_location_store_<0,0>(dummy_index, scratch); cast().template calc_gradient_<0,0>(stack, dummy_index, scratch); return val; } // For each array in the expression use location "i" to return the // memory index template void set_location(const ExpressionSize& i, ExpressionSize& index) const { cast().template set_location_<0>(i, index); } // Get the value at the specified location and move to the next // location template Type next_value(ExpressionSize& index) const { Type val = cast().template value_at_location_<0>(index); cast().template advance_location_<0>(index); return val; } // If all arrays are have an inner dimension that is contiguous // and increasing then their indices may be incremented all // together, which is more efficient template Type next_value_contiguous(ExpressionSize& index) const { Type val = cast().template value_at_location_<0>(index); ++index; return val; } template Packet next_packet(ExpressionSize& index) const { Packet val = cast().template packet_at_location_<0>(index); index += Packet::size; return val; } template Type value_at_location(ExpressionSize& index) const { return cast().template value_at_location_<0>(index); } template void advance_location(ExpressionSize& index) const { cast().template advance_location_<0>(index); } // Get the value at the specified location, calculate the gradient // and move to the next location template Type next_value_and_gradient(Stack& stack, ExpressionSize& index) const { internal::ScratchVector scratch; Type val = cast().template value_at_location_store_<0,0>(index, scratch); cast().template calc_gradient_<0,0>(stack, index, scratch); cast().template advance_location_<0>(index); //++index; return val; } template Type next_value_and_gradient_contiguous(Stack& stack, ExpressionSize& index) const { internal::ScratchVector scratch; Type val = cast().template value_at_location_store_<0,0>(index, scratch); cast().template calc_gradient_<0,0>(stack, index, scratch); //cast().template advance_location_<0>(index); ++index; return val; } // This is used in product() template Type next_value_and_gradient_special(Stack& stack, ExpressionSize& index, const MyType& multiplier) const { internal::ScratchVector scratch; Type val = cast().template value_at_location_store_<0,0>(index, scratch); cast().template calc_gradient_<0,0>(stack, index, scratch, multiplier); cast().template advance_location_<0>(index); return val; } // This is used in norm2() template Type next_value_and_gradient_special2(Stack& stack, ExpressionSize& index, const MyType& multiplier) const { internal::ScratchVector scratch; Type val = cast().template value_at_location_store_<0,0>(index, scratch); cast().template calc_gradient_<0,0>(stack, index, scratch, multiplier*val); cast().template advance_location_<0>(index); return val; } // Inaccessible methods // private: // Expression(const Expression&) { } }; // End struct Expression // --------------------------------------------------------------------- // SECTION 2: Definition of Scalar type // --------------------------------------------------------------------- // Specific types of operation are in the adept::internal namespace namespace internal { // SCALAR template struct Scalar : public Expression > { static const int rank = 0; static const int n_scratch = 0; static const int n_active = 0; static const int n_arrays = 0; static const bool is_active = false; static const bool is_vectorizable = true; Scalar(const Type& value) : val_(value) { } bool get_dimensions_(ExpressionSize<0>& dim) const { return true; } std::string expression_string_() const { std::stringstream s; s << val_; return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return false; } Type value_with_len_(const Index& j, const Index& len) const { return val_; } template void advance_location_(ExpressionSize& loc) const { } template Type value_at_location_(const ExpressionSize& loc) const { return val_; } template Packet packet_at_location_(const ExpressionSize& loc) const { return Packet(val_); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return val_; } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return val_; } template PacketType values_at_location_(const ExpressionSize& loc) const { return PacketType(val_); } template PacketType values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return PacketType(val_); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const {} template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, const MyType& multiplier) const {} template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const ScratchVector >& scratch, ScratchVector >& gradients) const {} template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const ScratchVector >& scratch, ScratchVector >& gradients, const MyType& multiplier) const {} template void set_location_(const ExpressionSize& i, ExpressionSize& index) const {} protected: Type val_; }; // --------------------------------------------------------------------- // SECTION 3. "expr_cast" helper // --------------------------------------------------------------------- // The following enables one of the static consts only in a // derived class of Expression to be extracted, and is useful when // you don't know whether a template argument to a function is an // Expression or a class derived from it. Thus // expr_cast >::is_vectorizable and // expr_cast::is_vectorizable would both return // Array::is_vectorizable. template struct expr_cast { // Rank of the array static const int rank = E::rank; // Number of scratch floating-point variables needed in the // expression, for example to store the result of a calculation // when it is needed again to compult the equivalent differential // statement static const int n_scratch = E::n_scratch; // Number of arrays within the expression; more specifically, // the number of indices required to store the location of an // element of the array static const int n_arrays = E::n_arrays; // Number of active terms in the expression static const int n_active = E::n_active; // Is this an array expression? static const bool is_array = (E::rank > 0); // Is this an array expression with dimension of 2 or more? static const bool is_multidimensional = (E::rank > 1); // Is this an active expression? static const bool is_active = E::is_active; // Is this expression actually an lvalue such as Array or // FixedArray? static const bool is_lvalue = E::is_lvalue; // Is this expression vectorizable (conditional on a few extra // run-time checks)? static const bool is_vectorizable = E::is_vectorizable; }; template struct expr_cast > { static const int rank = E::rank; static const int n_scratch = E::n_scratch; static const int n_arrays = E::n_arrays; static const int n_active = E::n_active; static const bool is_array = (E::rank > 0); static const bool is_multidimensional = (E::rank > 1); static const bool is_active = E::is_active; static const bool is_lvalue = E::is_lvalue; static const bool is_vectorizable = E::is_vectorizable; }; } } #endif // AdeptExpression_H ================================================ FILE: include/adept/ExpressionSize.h ================================================ /* ExpressionSize.h -- Class for describing array sizes Copyright (C) 2014-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The ExpressionSize class is used to pass information between components of an expression on the dimensions (e.g. rows/columns, but works in any number of dimensions) of that part of an expression, and to check that the dimensions match. Since ExpressionSize objects can be used to index arrays, they may be useful to users and so are not placed in the "adept::internal" namespace. */ #ifndef AdeptExpressionSize_H #define AdeptExpressionSize_H #include #include #include #include namespace adept { // Definition of ExpressionSize class template class ExpressionSize { public: // Constructors ExpressionSize() { } // By default no initialization is done ExpressionSize(Index j) { if (j >= 0) { // Set all dimensions to the same value - usually 0 (empty // array) or 1 (scalar) set_all(j); } else { // Set just the first dimension to j; usually this would be // less than 0 to indicate an invalid expression dim[0] = j; } } ExpressionSize(Index j0, Index j1) { dim[0]=j0; dim[1]=j1; } ExpressionSize(Index j0, Index j1, Index j2) { dim[0]=j0; dim[1]=j1; dim[2]=j2; } ExpressionSize(Index j0, Index j1, Index j2, Index j3) { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; } ExpressionSize(Index j0, Index j1, Index j2, Index j3, Index j4) { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; dim[4]=j4; } ExpressionSize(Index j0, Index j1, Index j2, Index j3, Index j4, Index j5) { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; dim[4]=j4; dim[5]=j5; } ExpressionSize(Index j0, Index j1, Index j2, Index j3, Index j4, Index j5, Index j6) { dim[0]=j0; dim[1]=j1; dim[2]=j2; dim[3]=j3; dim[4]=j4; dim[5]=j5; dim[6]=j6; } // Assume copy constructor will copy elements of dim // An "invalid" expression is one involving a mismatch of array // sizes, and is conveyed by a negative first element bool invalid_expression() const { return (dim[0] < 0); } // Set all to specified value void set_all(Index j) { for (int i = 0; i < Rank; ++i) { dim[i] = j; } } // Copy from an ExpressionSize object of the same rank void copy(const ExpressionSize& d) { for (int i = 0; i < Rank; ++i) { dim[i] = d[i]; } } // ...or pointer to raw data void copy(const Index* d) { for (int i = 0; i < Rank; ++i) { dim[i] = d[i]; } } // Copy dissimilar ExpressionSize object, filling the remaining // dimensions with 1 template void copy_dissimilar(const ExpressionSize& d) { int rank = MyRank > Rank ? Rank : MyRank; for (int i = 0; i < rank; ++i) { dim[i] = d[i]; } for (int i = rank; i < Rank; ++i) { dim[i] = 1; } } // String representation std::string str() const { std::stringstream s; s << "[" << dim[0]; for (int i = 1; i < Rank; ++i) { s << "," << dim[i]; } s << "]"; return s.str(); } // Get the total number of elements Index size() const { Index prod; if (Rank == 0) { prod = 1; } else { prod = dim[0]; for (int i = 1; i < Rank; ++i) { prod *= dim[i]; } } return prod; } ExpressionSize& operator++() { for (int i = 0; i < Rank; ++i) { ++dim[i]; } return *this; } ExpressionSize& operator+=(Index inc) { for (int i = 0; i < Rank; ++i) { dim[i] += inc; } return *this; } bool operator==(const ExpressionSize& rhs) const { for (int i = 0; i < Rank; i++) { if (dim[i] != rhs[i]) { return false; } } return true; } bool operator!=(const ExpressionSize& rhs) const { return !(*this == rhs); } #ifdef ADEPT_MOVE_SEMANTICS friend void swap(ExpressionSize& l, ExpressionSize& r) noexcept { for (int i = 0; i < Rank; ++i) { Index tmp = l.dim[i]; l.dim[i] = r.dim[i]; r.dim[i] = tmp; } } #endif // Const and non-const access to elements Index& operator[](int i) { return dim[i]; } const Index& operator[](int i) const { return dim[i]; } private: Index dim[Rank]; }; // Specialization for scalars (zero-rank arrays) known at compile // time template <> class ExpressionSize<0> { public: ExpressionSize() { } ExpressionSize(Index j) { } bool invalid_expression() const { return false; } std::string str() const { return ""; } void set_all(Index) const { } bool operator==(const ExpressionSize<0>&) const { return true; } bool operator!=(const ExpressionSize<0>&) const { return false; } bool operator[](int) const { return 0; } template void copy_dissimilar(const ExpressionSize&) { } }; // Send the size of an expression to a stream template inline std::ostream& operator<<(std::ostream& os, const ExpressionSize& s) { if (Rank > 0) { os << "(" << s[0]; for (int i = 1; i < Rank; i++) { os << "," << s[i]; } return os << ")"; } } namespace internal { // The following are only used within the Adept library // Check whether the size of one expression is compatible with // that of another for arithmetic operations: this is "true" if // the rank is the same and the dimensions match, or if one of the // expressions is a scalar (zero rank). If the ranks don't match // and neither is zero then the program won't compile. template inline typename enable_if1), bool>::type compatible(const ExpressionSize& l, const ExpressionSize& r) { bool result = (l[0] == r[0]); for (int i = 1; i < RRank; ++i) { result = result && (l[i] == r[i]); } return result; } template inline typename enable_if::type compatible(const ExpressionSize& l, const ExpressionSize& r) { return l[0] == r[0]; } template inline typename enable_if::type compatible(const ExpressionSize& l, const ExpressionSize& r) { return true; } // Return an ExpressionSize object of specified rank that expresses // an invalid expression template inline ExpressionSize invalid_expression_size() { return ExpressionSize(-1); } } // End namespace internal // Deprecated inline ExpressionSize<1> expression_size(Index j0) { return ExpressionSize<1>(j0); } inline ExpressionSize<2> expression_size(Index j0, Index j1) { return ExpressionSize<2>(j0, j1); } // Use this instead inline ExpressionSize<1> dimensions(Index j0) { return ExpressionSize<1>(j0); } inline ExpressionSize<2> dimensions(Index j0, Index j1) { return ExpressionSize<2>(j0, j1); } inline ExpressionSize<3> dimensions(Index j0, Index j1, Index j2) { return ExpressionSize<3>(j0, j1, j2); } inline ExpressionSize<4> dimensions(Index j0, Index j1, Index j2, Index j3) { return ExpressionSize<4>(j0, j1, j2, j3); } inline ExpressionSize<5> dimensions(Index j0, Index j1, Index j2, Index j3, Index j4) { return ExpressionSize<5>(j0, j1, j2, j3, j4); } inline ExpressionSize<6> dimensions(Index j0, Index j1, Index j2, Index j3, Index j4, Index j5) { return ExpressionSize<6>(j0, j1, j2, j3, j4, j5); } inline ExpressionSize<7> dimensions(Index j0, Index j1, Index j2, Index j3, Index j4, Index j5, Index j6) { return ExpressionSize<7>(j0, j1, j2, j3, j4, j5, j6); } } // End namespace adept #endif // AdeptExpressionSize_H ================================================ FILE: include/adept/FixedArray.h ================================================ /* FixedArray.h -- active or inactive FixedArray of arbitrary rank Copyright (C) 2014-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The FixedArray class has functionality modelled on Fortran-90 arrays - they can have a rank up to 7 (above will work, but some forms of indexing these arrays will not work). */ #ifndef AdeptFixedArray_H #define AdeptFixedArray_H 1 #include #include #include #include #include #include namespace adept { namespace internal { // ------------------------------------------------------------------- // Helper classes // ------------------------------------------------------------------- // The following are used by expression_string() template struct fixed_array_helper { const char* name() { return "FixedArray"; } }; template struct fixed_array_helper { const char* name() { return "aFixedArray"; } }; template <> struct fixed_array_helper<1,false> { const char* name() { return "FixedVector"; } }; template <> struct fixed_array_helper<1,true> { const char* name() { return "aFixedVector"; } }; template <> struct fixed_array_helper<2,false> { const char* name() { return "FixedMatrix"; } }; template <> struct fixed_array_helper<2,true> { const char* name() { return "aFixedMatrix"; } }; template struct fixed_array { static const int rank = (J0>0) * (1 + (J1>0) * (1 + (J2>0) * (1 + (J3>0) * (1 + (J4>0) * (1 + (J5>0) * (1 + (J6>0))))))); static const Index length = (J0 + (J0<1)) * (J1 + (J1<1)) * (J2 + (J2<1)) * (J3 + (J3<1)) * (J4 + (J4<1)) * (J5 + (J5<1)) * (J6 + (J6<1)); }; } // End namespace internal // ------------------------------------------------------------------- // Definition of FixedArray class // ------------------------------------------------------------------- template class FixedArray : public Expression >, protected internal::GradientIndex { public: // ------------------------------------------------------------------- // FixedArray: 1. Static Definitions // ------------------------------------------------------------------- // The Expression base class needs access to some protected member // functions in section 5 friend struct Expression >; // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = IsActive; static const bool is_lvalue = true; static const int rank = internal::fixed_array::rank; static const int length_ = internal::fixed_array::length; static const int n_active = IsActive * (1 + internal::is_complex::value); static const int n_scratch = 0; static const int n_arrays = 1; static const bool is_vectorizable = Packet::is_vectorized; protected: template struct dimension_alias { }; template struct dimension_alias<0,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X0; }; template struct dimension_alias<1,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X1; }; template struct dimension_alias<2,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X2; }; template struct dimension_alias<3,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X3; }; template struct dimension_alias<4,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X4; }; template struct dimension_alias<5,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X5; }; template struct dimension_alias<6,X0,X1,X2,X3,X4,X5,X6> { static const Index value = X6; }; public: template struct dimension_ { static const int value = dimension_alias::value; }; template struct offset_helper { static const Index value = // Dim == Rank-1 ? 1 : dimension_::value*offset_helper::value; }; template struct offset_helper<1,Dim> { static const Index value = 1; }; template struct offset_helper<0,Dim> { static const Index value = 1; }; template struct offset_helper<-1,Dim> { static const Index value = 1; }; template struct offset_helper<-2,Dim> { static const Index value = 1; }; template struct offset_helper<-3,Dim> { static const Index value = 1; }; template struct offset_helper<-4,Dim> { static const Index value = 1; }; template struct offset_helper<-5,Dim> { static const Index value = 1; }; template struct offset_ { static const Index value = offset_helper::value; }; // ------------------------------------------------------------------- // FixedArray: 2. Constructors // ------------------------------------------------------------------- // Initialize an empty array FixedArray() : internal::GradientIndex(length_, false) { ADEPT_STATIC_ASSERT(!(std::numeric_limits::is_integer && IsActive), CANNOT_CREATE_ACTIVE_FIXED_ARRAY_OF_INTEGERS); #ifdef ADEPT_REAL_INIT initialize(); #endif } #ifdef ADEPT_REAL_INIT private: // Initialize to zero, NaN or whatever for debugging template typename internal::enable_if::value, void>::type initialize() { for (int i = 0; i < length_; ++i) { data_[i] = ADEPT_INIT_REAL; } } template typename internal::enable_if::value, void>::type initialize() { for (int i = 0; i < length_; ++i) { #ifdef ADEPT_INIT_REAL_SNAN data_[i] = std::complex( std::numeric_limits::signaling_NaN(), std::numeric_limits::signaling_NaN()); #else data_[i] = std::complex(ADEPT_INIT_REAL, ADEPT_INIT_REAL); #endif } } // Dummy initialize for non-floats template typename internal::enable_if::value && !internal::is_complex::value, void>::type initialize() { } public: #endif // Copy constructor copies the data, unlike in the Array class FixedArray(const FixedArray& rhs) : internal::GradientIndex(length_, false) { *this = rhs; } public: // Initialize with an expression on the right hand side by // evaluating the expression, requiring the ranks to be equal. // Note that this constructor enables expressions to be used as // arguments to functions that expect an array - to prevent this // implicit conversion, use the "explicit" keyword. template FixedArray(const Expression& rhs, typename internal::enable_if::type = 0) : internal::GradientIndex(length_, false) { *this = rhs; } #ifdef ADEPT_CXX11_FEATURES // Initialize from initializer list template FixedArray(std::initializer_list list) : internal::GradientIndex(length_,false) { *this = list; } // The unfortunate restrictions on initializer_list constructors // mean that each possible Array rank needs explicit treatment template FixedArray(std::initializer_list< std::initializer_list > list) : internal::GradientIndex(length_,false) { *this = list; } template FixedArray(std::initializer_list< std::initializer_list< std::initializer_list > > list) : internal::GradientIndex(length_,false) { *this = list; } template FixedArray(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > list) : internal::GradientIndex(length_,false) { *this = list; } template FixedArray(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > list) : internal::GradientIndex(length_,false) { *this = list; } template FixedArray(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > > list) : internal::GradientIndex(length_,false) { *this = list; } template FixedArray(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > > > list) : internal::GradientIndex(length_,false) { *this = list; } #endif // Destructor: if the data are stored in a Storage object then we // tell it that one fewer object is linking to it; if the number // of links to it drops to zero, it will destruct itself and // deallocate the memory. ~FixedArray() { internal::GradientIndex::unregister(length_); } // ------------------------------------------------------------------- // FixedArray: 3. Assignment operators // ------------------------------------------------------------------- // Assignment to another matrix: copy the data... // Ideally we would like this to fall back to the operator=(const // Expression&) function, but if we don't define a copy assignment // operator then C++ will generate a default one :-( FixedArray& operator=(const FixedArray& rhs) { *this = static_cast&> (rhs); return *this; } // Assignment to an array expression of the same rank template typename internal::enable_if::type inline operator=(const Expression& rhs) { #ifndef ADEPT_NO_DIMENSION_CHECKING ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "FixedArray size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (!internal::compatible(dims, dimensions())) { std::string str = "Expr"; str += dims.str() + " object assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } #endif // Select active/passive version by delegating to a protected // function assign_expression_(rhs); return *this; } // Assignment to a single value copies to every element template typename internal::enable_if::value, FixedArray&>::type operator=(RType rhs) { assign_inactive_scalar_(rhs); return *this; } // Assign active scalar expression to an active array by first // converting the RHS to an active scalar template typename internal::enable_if 0) && IsActive && !E::is_lvalue, FixedArray&>::type operator=(const Expression& rhs) { Active x = rhs; *this = x; return *this; } // Assign an active scalar to an active array template FixedArray& operator=(const Active& rhs) { ADEPT_STATIC_ASSERT(IsActive, ATTEMPT_TO_ASSIGN_ACTIVE_SCALAR_TO_INACTIVE_FIXED_ARRAY); #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar_(rhs.scalar_value()); return *this; } #endif // In case PType != Type we make a local copy to minimize type // conversions Type val = rhs.scalar_value(); ADEPT_ACTIVE_STACK->check_space(length_); for (Index i = 0; i < length_; ++i) { data_[i] = val; ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index()); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+i); } return *this; } #define ADEPT_DEFINE_OPERATOR(OPERATOR, OPSYMBOL) \ template \ FixedArray& OPERATOR(const RType& rhs) { \ return *this = noalias(*this OPSYMBOL rhs); \ } ADEPT_DEFINE_OPERATOR(operator+=, +) ADEPT_DEFINE_OPERATOR(operator-=, -) ADEPT_DEFINE_OPERATOR(operator*=, *) ADEPT_DEFINE_OPERATOR(operator/=, /) // ADEPT_DEFINE_OPERATOR(operator&=, &); // ADEPT_DEFINE_OPERATOR(operator|=, |); #undef ADEPT_DEFINE_OPERATOR // Enable the A.where(B) = C construct. // Firstly implement the A.where(B) to return a "Where" object template typename internal::enable_if >::type where(const Expression& bool_expr) { #ifndef ADEPT_NO_DIMENSION_CHECKING ExpressionSize dims; if (!bool_expr.get_dimensions(dims)) { std::string str = "FixedArray size mismatch in " + bool_expr.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims != dimensions()) { throw size_mismatch("Boolean expression of different size" ADEPT_EXCEPTION_LOCATION); } #endif return internal::Where(*this, bool_expr.cast()); } // When Where = C is invoked, it calls // A.assign_conditional(B,C). This is implemented separately for // the case when C is an inactive scalar and when it is an array // expression. template typename internal::enable_if::value, void>::type assign_conditional(const Expression& bool_expr, C rhs) { if (!empty()) { assign_conditional_inactive_scalar_(bool_expr, rhs); } } template void assign_conditional(const Expression& bool_expr, const Expression& rhs) { #ifndef ADEPT_NO_DIMENSION_CHECKING // Assume size of bool_expr already checked ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "FixedArray size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (!internal::compatible(dims,dimensions())) { throw size_mismatch("Right-hand-side of \"where\" construct of incompatible size" ADEPT_EXCEPTION_LOCATION); } #endif // Select active/passive version by delegating to a // protected function assign_conditional_(bool_expr.cast(), rhs.cast()); // return *this; } #ifdef ADEPT_CXX11_FEATURES // Assignment of a FixedArray to an initializer list; the first ought // to only work for vectors template typename internal::enable_if::value, FixedArray&>::type operator=(std::initializer_list list) { ADEPT_STATIC_ASSERT(rank==1, RANK_MISMATCH_IN_INITIALIZER_LIST); if (list.size() > J0) { throw size_mismatch("Initializer list is larger than Vector in assignment" ADEPT_EXCEPTION_LOCATION); } // Zero the whole array first in order that automatic // differentiation works *this = 0; Index index = 0; for (auto i = std::begin(list); i < std::end(list); ++i, ++index) { data_[index*offset_<0>::value] = *i; } return *this; } // Assignment of a higher rank Array to a list of lists... template FixedArray& operator=(std::initializer_list > list) { ADEPT_STATIC_ASSERT(rank==internal::initializer_list_rank::value+2, RANK_MISMATCH_IN_INITIALIZER_LIST); if (list.size() > J0) { throw size_mismatch("Multi-dimensional initializer list larger than slowest-varying dimension of Array" ADEPT_EXCEPTION_LOCATION); } // Zero the whole array first in order that automatic // differentiation works *this = 0; // Enact the assignment using the Array version inactive_link() = list; return *this; } #endif // ------------------------------------------------------------------- // FixedArray: 4. Access functions, particularly operator() // ------------------------------------------------------------------- // Get l-value of the element at the specified coordinates typename internal::active_reference::type get_lvalue(const ExpressionSize& i) { return get_lvalue_(index_(i)); } typename internal::active_scalar::type get_rvalue(const ExpressionSize& i) const { return get_rvalue_(index_(i)); } protected: template typename internal::enable_if >::type get_lvalue_(const Index& loc) { return ActiveReference(data_[loc], gradient_index()+loc); } template typename internal::enable_if::type get_lvalue_(const Index& loc) { return data_[loc]; } template typename internal::enable_if >::type get_rvalue_(const Index& loc) const { return Active(data_[loc], gradient_index()+loc); } template typename internal::enable_if::type get_rvalue_(const Index& loc) const { return data_[loc]; } public: // Get a constant reference to the element at the specified // location, ignoring whether it is active or not // const Type& get(const ExpressionSize& i) const { // return data_[index_(i)]; // } // The following provide a way to access individual elements of // the array. There must be the same number of arguments to // operator() as the rank of the array. Each argument must be of // integer type, or a rank-0 expression of integer type (such as // "end" or "end-3"). Inactive arrays return a reference to the // element, while active arrays return an ActiveReference // object. Up to 7 dimensions are supported. // l-value access to inactive array with function-call operator template typename internal::enable_if::value && !IsActive, Type&>::type operator()(I0 i0) { return data_[internal::get_index_with_len(i0,J0)]; } // r-value access to inactive array with function-call operator template typename internal::enable_if::value && !IsActive, const Type&>::type operator()(I0 i0) const { return data_[internal::get_index_with_len(i0,J0)]; } // l-value access to inactive array with element-access operator template typename internal::enable_if::value && !IsActive, Type&>::type operator[](I0 i0) { return data_[internal::get_index_with_len(i0,J0)]; } // r-value access to inactive array with element-access operator template typename internal::enable_if::value && !IsActive, const Type&>::type operator[](I0 i0) const { return data_[internal::get_index_with_len(i0,J0)]; } protected: template typename internal::enable_if::type get_scalar_reference(const Index& offset) { return data_[offset]; } template typename internal::enable_if::type get_scalar_reference(const Index& offset) const { return data_[offset]; } template typename internal::enable_if >::type get_scalar_reference(const Index& offset) { return ActiveReference(data_[offset], gradient_index()+offset); } template typename internal::enable_if >::type get_scalar_reference(const Index& offset) const { return ActiveConstReference(data_[offset], gradient_index()+offset); } public: // l-value access to active array with function-call operator template typename internal::enable_if::value && IsActive, ActiveReference >::type operator()(I0 i0) { Index offset = internal::get_index_with_len(i0,J0); return ActiveReference(data_[offset], gradient_index()+offset); } // r-value access to active array with function-call operator template typename internal::enable_if::value && IsActive, ActiveConstReference >::type operator()(I0 i0) const { Index offset = internal::get_index_with_len(i0,J0); return ActiveConstReference(data_[offset], gradient_index()+offset); } // l-value access to active array with element-access operator template typename internal::enable_if::value && IsActive, ActiveReference >::type operator[](I0 i0) { Index offset = internal::get_index_with_len(i0,J0); return ActiveReference(data_[offset], gradient_index()+offset); } // r-value access to active array with element-access operator template typename internal::enable_if::value && IsActive, ActiveConstReference >::type operator[](I0 i0) const { Index offset = internal::get_index_with_len(i0,J0); return ActiveConstReference(data_[offset], gradient_index()+offset); } // 2D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1) { return get_scalar_reference( internal::get_index_with_len(i0,J0)*J1 + internal::get_index_with_len(i1,J1)); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1) const { return get_scalar_reference( internal::get_index_with_len(i0,J0)*J1 + internal::get_index_with_len(i1,J1)); } // 3D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2) { return get_scalar_reference(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2) const { return get_scalar_reference(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)); } // 4D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) { return get_scalar_reference(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) const { return get_scalar_reference(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)); } // 5D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) { return get_scalar_reference(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)) + internal::get_index_with_len(i4,J4)); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const { return get_scalar_reference(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)) + internal::get_index_with_len(i4,J4)); } // 6D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) { return get_scalar_reference(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)) + internal::get_index_with_len(i4,J4)) + internal::get_index_with_len(i5,J5)); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const { return get_scalar_reference(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)) + internal::get_index_with_len(i4,J4)) + internal::get_index_with_len(i5,J5)); } // 7D array l-value and r-value access template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) { return get_scalar_reference(J6*(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)) + internal::get_index_with_len(i4,J4)) + internal::get_index_with_len(i5,J5)) + internal::get_index_with_len(i6,J6)); } template typename internal::enable_if::value, typename internal::active_const_reference::type>::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const { return get_scalar_reference(J6*(J5*(J4*(J3*(J2*(J1*internal::get_index_with_len(i0,J0) + internal::get_index_with_len(i1,J1)) + internal::get_index_with_len(i2,J2)) + internal::get_index_with_len(i3,J3)) + internal::get_index_with_len(i4,J4)) + internal::get_index_with_len(i5,J5)) + internal::get_index_with_len(i6,J6)); } // The following define the case when operator() is called and one // of the arguments is a "range" object (an object that describes // a range of indices that are either contiguous or separated by a // fixed stride), while all others are of integer type (or a // rank-0 expression of integer type). An Array object is returned // with a rank that may be reduced from that of the original // array, by one for each dimension that was indexed by an // integer. The new array points to a subset of the original data, // so modifying it will modify the original array. // First the case of a vector where we know the argument must be a // "range" object template typename internal::enable_if::value, Array<1,Type,IsActive> >::type operator()(I0 i0) { ExpressionSize<1> new_dim((i0.end(J0) + i0.stride(J0) - i0.begin(J0)) /i0.stride(J0)); ExpressionSize<1> new_offset(i0.stride(J0)); return Array<1,Type,IsActive>(data_, i0.begin(J0), new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array<1,Type,IsActive> >::type operator()(I0 i0) const { ExpressionSize<1> new_dim((i0.end(J0) + i0.stride(J0) - i0.begin(J0)) /i0.stride(J0)); ExpressionSize<1> new_offset(i0.stride(J0)); return Array<1,Type,IsActive>(data_, i0.begin(J0), new_dim, new_offset, internal::GradientIndex::get()); } private: // For multi-dimensional arrays, we need a helper function // Treat the indexing of dimension "irank" in the case that the // index is of integer type template typename internal::enable_if::value, void>::type update_index(const T& i, Index& inew_rank, Index& ibegin, ExpressionSize& new_dim, ExpressionSize& new_offset) const { ibegin += internal::get_index_with_len(i,dimension_::value)*offset_::value; } // Treat the indexing of dimension "irank" in the case that the // index is a "range" object template typename internal::enable_if::value, void>::type update_index(const T& i, Index& inew_rank, Index& ibegin, ExpressionSize& new_dim, ExpressionSize& new_offset) const { ibegin += i.begin(dimension_::value)*offset_::value; new_dim[inew_rank] = (i.end(dimension_::value) + i.stride(dimension_::value)-i.begin(dimension_::value)) / i.stride(dimension_::value); new_offset[inew_rank] = i.stride(dimension_::value)*offset_::value; ++inew_rank; } public: // Now the individual overloads for each number of arguments, up // to 7, with separate r-value (const) and l-value (non-const) // versions template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset); update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset); update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset); update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset); update_index<6>(i6, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } template typename internal::enable_if::value, const Array::count,Type,IsActive> >::type operator()(I0 i0, I1 i1, I2 i2, I3 i3, I4 i4, I5 i5, I6 i6) const { static const int new_rank = internal::is_ranged::count; ExpressionSize new_dim; ExpressionSize new_offset; Index inew_rank = 0; Index ibegin = 0; update_index<0>(i0, inew_rank, ibegin, new_dim, new_offset); update_index<1>(i1, inew_rank, ibegin, new_dim, new_offset); update_index<2>(i2, inew_rank, ibegin, new_dim, new_offset); update_index<3>(i3, inew_rank, ibegin, new_dim, new_offset); update_index<4>(i4, inew_rank, ibegin, new_dim, new_offset); update_index<5>(i5, inew_rank, ibegin, new_dim, new_offset); update_index<6>(i6, inew_rank, ibegin, new_dim, new_offset); return Array(data_, ibegin, new_dim, new_offset, internal::GradientIndex::get()); } // If one or more of the indices is not guaranteed to be monotonic // at compile time then we must return an IndexedArray, now done // for all possible numbers of arguments // Indexing a 1D array template typename internal::enable_if::value && !internal::is_ranged::value, internal::IndexedArray >::type operator()(const I0& i0) { return internal::IndexedArray(*this, i0); } template typename internal::enable_if::value && !internal::is_ranged::value, const internal::IndexedArray >::type operator()(const I0& i0) const { return internal::IndexedArray(*const_cast(this), i0); } // Indexing a 2D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1> >::type operator()(const I0& i0, const I1& i1) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1> >::type operator()(const I0& i0, const I1& i1) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1); } // Indexing a 3D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2> >::type operator()(const I0& i0, const I1& i1, const I2& i2) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1, i2); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2> >::type operator()(const I0& i0, const I1& i1, const I2& i2) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1, i2); } // Indexing a 4D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1, i2, i3); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1, i2, i3); } // Indexing a 5D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3,I4> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this, i0, i1, i2, i3, i4); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3,I4> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this), i0, i1, i2, i3, i4); } // Indexing a 6D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this,i0,i1,i2,i3,i4,i5); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this),i0,i1,i2,i3,i4,i5); } // Indexing a 7D array template typename internal::enable_if::value, internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5,I6> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6) { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*this,i0,i1,i2,i3,i4,i5,i6); } template typename internal::enable_if::value, const internal::IndexedArray::count, Type,IsActive,FixedArray,I0,I1,I2,I3,I4,I5,I6> >::type operator()(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, const I5& i5, const I6& i6) const { static const int new_rank = internal::is_irreg_indexed::count; return internal::IndexedArray(*const_cast(this),i0,i1,i2,i3,i4,i5,i6); } // Provide a C-array-like array access: for a multidimensional // array, operator[](i), where i is of integer type, returns an // array of rank one less than the original array, where the new // array is "sliced" at index i of dimension 0. For a vector, // operator[](i) returns an l-value to the element at i. Thus for // a 3D array A, A[1][2][3] returns a single element. Note that // this will be slower than A(1,2,3) because each operator[] // creates a new array (although does not copy the data). template typename internal::enable_if::value && (rank > 1), Array >::type operator[](T i) { int index = internal::get_index_with_len(i,J0)*offset_<0>::value; ExpressionSize new_dim; ExpressionSize new_offset; ExpressionSize dims = dimensions(); ExpressionSize offs = offset(); for (int j = 1; j < rank; ++j) { new_dim[j-1] = dims[j]; new_offset[j-1] = offs[j]; } return Array(data_, index, new_dim, new_offset, internal::GradientIndex::get()); } // diag_matrix(), where *this is a 1D array, returns a DiagMatrix // containing the data as the diagonal pointing to the original // data, Can be used as an lvalue. Defined in SpecialMatrix.h SpecialMatrix, IsActive> diag_matrix(); Array<1,Type,IsActive> diag_vector(Index offdiag = 0) { ADEPT_STATIC_ASSERT(rank == 2, DIAG_VECTOR_ONLY_WORKS_ON_SQUARE_MATRICES); if (empty()) { // Return an empty vector return Array<1,Type,IsActive>(); } else if (J0 != J1) { throw invalid_operation("diag_vector member function only applicable to square matrices" ADEPT_EXCEPTION_LOCATION); } else if (offdiag >= 0) { Index new_dim = std::min(J0, J1-offdiag); return Array<1,Type,IsActive>(data_, offset_<1>::value*offdiag, ExpressionSize<1>(new_dim), ExpressionSize<1>(offset_<0>::value+offset_<1>::value), internal::GradientIndex::get()); } else { Index new_dim = std::min(J0+offdiag, J1); return Array<1,Type,IsActive>(data_,-offset_<0>::value*offdiag, ExpressionSize<1>(new_dim), ExpressionSize<1>(offset_<0>::value+offset_<1>::value), internal::GradientIndex::get()); } } Array<2,Type,IsActive> submatrix_on_diagonal(Index ibegin, Index iend) { ADEPT_STATIC_ASSERT(rank == 2, SUBMATRIX_ON_DIAGONAL_ONLY_WORKS_ON_SQUARE_MATRICES); if (J0 != J1) { throw invalid_operation("submatrix_on_diagonal member function only applicable to square matrices" ADEPT_EXCEPTION_LOCATION); } else if (ibegin < 0 || ibegin > iend || iend >= J0) { throw index_out_of_bounds("Dimensions out of range in submatrix_on_diagonal" ADEPT_EXCEPTION_LOCATION); } else { Index len = iend-ibegin+1; ExpressionSize<2> dim(len,len); return Array<2,Type,IsActive>(data_, ibegin*(offset_<0>::value + offset_<1>::value), dim, offset(), internal::GradientIndex::get()); } } // For extracting contiguous sections out of an array use the // following. Currently this just indexes each dimension with the // contiguous range(a,b) index, but in future it may be optimized. // 1D array subset template Array<1,Type,IsActive> subset(const B0& ibegin0, const E0& iend0) { ADEPT_STATIC_ASSERT(rank == 1, SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY); return (*this)(range(ibegin0,iend0)); } template const Array<1,Type,IsActive> subset(const B0& ibegin0, const E0& iend0) const { ADEPT_STATIC_ASSERT(rank == 1, SUBSET_WITH_2_ARGS_ONLY_ON_RANK_1_ARRAY); return (*this)(range(ibegin0,iend0)); } // 2D array subset template Array<2,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1) { ADEPT_STATIC_ASSERT(rank == 2, SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1)); } template const Array<2,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1) const { ADEPT_STATIC_ASSERT(rank == 2, SUBSET_WITH_4_ARGS_ONLY_ON_RANK_2_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1)); } // 3D array subset template Array<3,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2) { ADEPT_STATIC_ASSERT(rank == 3, SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2)); } template const Array<3,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2) const { ADEPT_STATIC_ASSERT(rank == 3, SUBSET_WITH_6_ARGS_ONLY_ON_RANK_3_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2)); } // 4D array subset template Array<4,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3) { ADEPT_STATIC_ASSERT(rank == 4, SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3)); } template const Array<4,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3) const { ADEPT_STATIC_ASSERT(rank == 4, SUBSET_WITH_8_ARGS_ONLY_ON_RANK_4_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3)); } // 5D array subset template Array<5,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4) { ADEPT_STATIC_ASSERT(rank == 5, SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4)); } template const Array<5,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4) const { ADEPT_STATIC_ASSERT(rank == 5, SUBSET_WITH_10_ARGS_ONLY_ON_RANK_5_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4)); } // 6D array subset template Array<6,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5) { ADEPT_STATIC_ASSERT(rank == 6, SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5)); } template const Array<6,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5) const { ADEPT_STATIC_ASSERT(rank == 6, SUBSET_WITH_12_ARGS_ONLY_ON_RANK_6_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5)); } // 7D array subset template Array<7,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5, const B6& ibegin6, const E6& iend6) { ADEPT_STATIC_ASSERT(rank == 7, SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5), range(ibegin6,iend6)); } template const Array<7,Type,IsActive> subset(const B0& ibegin0, const E0& iend0, const B1& ibegin1, const E1& iend1, const B2& ibegin2, const E2& iend2, const B3& ibegin3, const E3& iend3, const B4& ibegin4, const E4& iend4, const B5& ibegin5, const E5& iend5, const B6& ibegin6, const E6& iend6) const { ADEPT_STATIC_ASSERT(rank == 7, SUBSET_WITH_14_ARGS_ONLY_ON_RANK_7_ARRAY); return (*this)(range(ibegin0,iend0),range(ibegin1,iend1), range(ibegin2,iend2),range(ibegin3,iend3), range(ibegin4,iend4),range(ibegin5,iend5), range(ibegin6,iend6)); } // ------------------------------------------------------------------- // FixedArray: 5. Public member functions // ------------------------------------------------------------------- // STL-like size() returns total length of array Index size() const { return length_; } bool get_dimensions_(ExpressionSize& dims) const { dims[0] = J0; if (J1 > 0) { dims[1] = J1; if (J2 > 0) { dims[2] = J2; if (J3 > 0) { dims[3] = J3; if (J4 > 0) { dims[4] = J4; if (J5 > 0) { dims[5] = J5; if (J6 > 0) { dims[6] = J6; } } } } } } return true; } // Return constant reference to dimensions ExpressionSize dimensions() const { ExpressionSize dims; get_dimensions_(dims); return dims; } // Return individual dimension Index size(int j) const { if (j >= rank) { return 0; } else if (j == 0) { return J0; } else if (j == 1) { return J1; } else if (j == 2) { return J2; } else if (j == 3) { return J3; } else if (j == 4) { return J4; } else if (j == 5) { return J5; } else { return J6; } } Index dimension(int j) const { return size(j); } // Return individual offset Index offset(int j) const { if (j >= rank) { return 0; } else if (j == 0) { return offset_<0>::value; } else if (j == 1) { return offset_<1>::value; } else if (j == 2) { return offset_<2>::value; } else if (j == 3) { return offset_<3>::value; } else if (j == 4) { return offset_<4>::value; } else if (j == 5) { return offset_<5>::value; } else if (j == 6) { return offset_<6>::value; } else { throw invalid_dimension(); } } // Return constant reference to offsets ExpressionSize offset() const { ExpressionSize offs; offs[0] = offset_<0>::value; if (J1 > 0) { offs[1] = offset_<1>::value; if (J2 > 0) { offs[2] = offset_<2>::value; if (J3 > 0) { offs[3] = offset_<3>::value; if (J4 > 0) { offs[4] = offset_<4>::value; if (J5 > 0) { offs[5] = offset_<5>::value; if (J6 > 0) { offs[6] = offset_<6>::value; } } } } } } return offs; } const Index& last_offset() const { return offset_::value; } // Return true if the array is empty bool empty() const { return (J0 == 0); } // Return a string describing the array std::string info_string() const { std::stringstream str; str << "FixedArray<" << rank << ">, dim=" << dimensions() << ", data_location=" << data_; if (IsActive) { str << ", gradient_index=" << gradient_index(); } return str.str(); } // Return a pointer to the start of the data Type* data() { return data_; } const Type* data() const { return data_; } const Type* const_data() const { return data_; } // Older style Type* data_pointer() { return data_; } const Type* data_pointer() const { return data_; } const Type* const_data_pointer() const { return data_; } // For vectors only, we allow a pointer to be returned to a // specified element Type* data_pointer(Index i) { ADEPT_STATIC_ASSERT(rank == 1, CAN_ONLY_USE_DATA_POINTER_WITH_INDEX_ON_VECTORS); if (data_) { return data_ + i; } else { return 0; } } const Type* const_data_pointer(Index i) const { ADEPT_STATIC_ASSERT(rank == 1, CAN_ONLY_USE_CONST_DATA_POINTER_WITH_INDEX_ON_VECTORS); if (data_) { return data_ + i; } else { return 0; } } bool is_aliased_(const Type* mem1, const Type* mem2) const { Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (ptr_begin <= mem2 && ptr_end >= mem1) { return true; } else { return false; } } // By design, FixedArrays are row-major and row-wise access is // contiguous bool all_arrays_contiguous_() const { return true; } bool is_aligned_() const { return !(reinterpret_cast(data_) & Packet::align_mask); } template int alignment_offset_() const { return (reinterpret_cast(data_)/sizeof(Type)) % n; } Type value_with_len_(const Index& j, const Index& len) const { ADEPT_STATIC_ASSERT(rank == 1, CANNOT_USE_VALUE_WITH_LEN_ON_ARRAY_OF_RANK_OTHER_THAN_1); return data_[j]; } std::string expression_string_() const { if (true) { std::string a = internal::fixed_array_helper().name(); a += dimensions().str(); return a; } else { std::stringstream s; print(s); return s.str(); } } // The same as operator=(inactive scalar) but does not put // anything on the stack template typename internal::enable_if::value, FixedArray&>::type set_value(RType x) { if (!empty()) { assign_inactive_scalar_(x); } return *this; } // Return the gradient index for the first element in the array, // or -1 if not active Index gradient_index() const { return internal::GradientIndex::get(); } std::ostream& print(std::ostream& os) const { const Array x(*this); x.print(os); return os; } // Get pointers to the first and last data members in memory. void data_range(Type const * &data_begin, Type const * &data_end) const { data_begin = data_; data_end = data_ + length_-1; } // The Stack::independent(x) and Stack::dependent(y) functions add // the gradient_index of objects x and y to std::vector // objects in Stack. Since x and y may be scalars or arrays, this // is best done by delegating to the Active or FixedArray classes. template void push_gradient_indices(std::vector& vec) const { ADEPT_STATIC_ASSERT(IsActive, CANNOT_PUSH_GRADIENT_INDICES_FOR_INACTIVE_ARRAY); ExpressionSize i(0); Index gradient_ind = gradient_index(); Index index = 0; int my_rank; vec.reserve(vec.size() + size()); do { // Innermost loop - note that the counter is index, not max_index for (Index max_index = index + dimension_::value*offset_::value; index < max_index; index += offset_::value) { vec.push_back(gradient_ind + index); } // Increment counters appropriately depending on which // dimensions have been finished advance_index(index, my_rank, i); } while (my_rank >= 0); } // Return inactive array linked to original data Array inactive_link() { return Array(data_, 0, dimensions(), offset(), internal::GradientIndex::get()); } // Transpose helper functions protected: template typename internal::enable_if >::type my_T() { // Transpose 2D array: create output array initially as link // to input array Array<2,Type,IsActive> out(*this); // Swap dimensions return out.in_place_transpose(); } template typename internal::enable_if >::type my_T() const { // Transpose 2D array: create output array initially as link // to input array Array<2,Type,IsActive> out(const_cast(*this)); // Swap dimensions return out.in_place_transpose(); } public: // Out-of-place transpose Array<2,Type,IsActive> T() { ADEPT_STATIC_ASSERT(rank == 1 || rank == 2, TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS); return my_T(); } const Array<2,Type,IsActive> T() const { ADEPT_STATIC_ASSERT(rank == 1 || rank == 2, TRANSPOSE_ONLY_POSSIBLE_WITH_1D_OR_2D_ARRAYS); return my_T(); } // "permute" is a generalized transpose, returning an FixedArray linked // to the current one but with the dimensions rearranged according // to idim: idim[0] is the 0-based number of the dimension of the // current array that will be dimension 0 of the new array, // idim[1] is the number of the dimension of the current array // that will be dimension 1 of the new array and so on. Array permute(const Index* idim) { if (empty()) { throw empty_array("Attempt to permute an empty array" ADEPT_EXCEPTION_LOCATION); } ExpressionSize new_dims(0); ExpressionSize new_offset; ExpressionSize dims, offs; dims = dimensions(); offs = offset(); for (int i = 0; i < rank; ++i) { if (idim[i] >= 0 && idim[i] < rank) { new_dims[i] = dims[idim[i]]; new_offset[i] = offs[idim[i]]; } else { throw invalid_dimension("Dimensions must be in range 0 to rank-1 in permute" ADEPT_EXCEPTION_LOCATION); } } for (int i = 0; i < rank; ++i) { if (new_dims[i] == 0) { throw invalid_dimension("Missing dimension in permute" ADEPT_EXCEPTION_LOCATION); } } return Array(data_, 0, new_dims, new_offset, internal::GradientIndex::get()); } Array permute(const ExpressionSize& idim) { return permute(&idim[0]); } // Up to 7 dimensions we can specify the dimensions as separate // arguments typename internal::enable_if<(rank < 7), Array >::type permute(Index i0, Index i1, Index i2 = -1, Index i3 = -1, Index i4 = -1, Index i5 = -1, Index i6 = -1) { Index idim[7] = {i0, i1, i2, i3, i4, i5, i6}; for (int i = 0; i < rank; ++i) { if (idim[i] == -1) { throw invalid_dimension("Incorrect number of dimensions provided to permute" ADEPT_EXCEPTION_LOCATION); } } return permute(idim); } // Return an inactive array of the same type and rank as the // present active fixed array, containing the gradients associated // with it template void get_gradient(Array& gradient) const { ADEPT_STATIC_ASSERT(IsActive,CANNOT_USE_GET_GRADIENT_ON_INACTIVE_ARRAY); if (gradient.empty()) { gradient.resize(dimensions()); } else if (gradient.dimensions() != dimensions()) { throw size_mismatch("Attempt to get_gradient with array of different dimensions" ADEPT_EXCEPTION_LOCATION); } static const int last = rank-1; ExpressionSize target_offset = gradient.offset(); ExpressionSize i(0); Index index = 0; int my_rank; Index index_target = 0; Index last_dim_stretch = dimension_::value*offset_::value; MyType* target = gradient.data(); do { i[last] = 0; index_target = 0; for (int r = 0; r < rank-1; r++) { index_target += i[r]*target_offset[r]; } ADEPT_ACTIVE_STACK->get_gradients(gradient_index()+index, gradient_index()+index+last_dim_stretch, target+index_target, offset_::value, target_offset[last]); index += last_dim_stretch; advance_index(index, my_rank, i); } while (my_rank >= 0); } // Return an inactive array of the same type and rank as the // present active array containing the gradients associated with // it Array get_gradient() const { Array gradient; get_gradient(gradient); return gradient; } void put(std::vector::type>& data) const { ADEPT_STATIC_ASSERT(rank == 1, PUT_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS); if (data.size() != J0) { data.resize(J0); } for (Index i = 0; i < J0; ++i) { data[i] = (*this)(i); } } void get(const std::vector::type>& data) { ADEPT_STATIC_ASSERT(rank == 1, GET_ONLY_AVAILABLE_FOR_RANK_1_ARRAYS); if (data.size() != J0) { resize(data.size()); } for (Index i = 0; i < J0; ++i) { (*this)(i) = data[i]; } } // ------------------------------------------------------------------- // FixedArray: 6. Member functions accessed by the Expression class // ------------------------------------------------------------------- template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { index[MyArrayNum] = index_(i); } template Type value_at_location_(const ExpressionSize& loc) const { return data_[loc[MyArrayNum]]; } template Packet packet_at_location_(const ExpressionSize& loc) const { return Packet(data_+loc[MyArrayNum]); } Type& lvalue_at_location(const Index& loc) { return data_[loc]; } template Type value_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return data_[loc[MyArrayNum]]; } template Type value_stored_(const ExpressionSize& loc, const internal::ScratchVector& scratch) const { return data_[loc[MyArrayNum]]; } template void advance_location_(ExpressionSize& loc) const { loc[MyArrayNum] += offset_::value; } // If an expression leads to calc_gradient being called on an // active object, we push the multiplier and the gradient index on // to the operation stack (or 1.0 if no multiplier is specified template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch) const { stack.push_rhs(1.0, gradient_index() + loc[MyArrayNum]); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch, const MyType& multiplier) const { stack.push_rhs(multiplier, gradient_index() + loc[MyArrayNum]); } // ------------------------------------------------------------------- // FixedArray: 7. Protected member functions // ------------------------------------------------------------------- protected: // Return the memory index (relative to data_) for array element // indicated by j Index index_(Index j[rank]) const { Index o = 0; ExpressionSize offs = offset(); for (int i = 0; i < rank; i++) { o += j[i]*offs[i]; } return o; } Index index_(const ExpressionSize& j) const { Index o = 0; for (int i = 0; i < rank; i++) { o += j[i]*offset(i); } return o; } // Used in traversing through an array void advance_index(Index& index, int& my_rank, ExpressionSize& i) const { index -= offset_::value*dimension_::value; my_rank = rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dimension(my_rank)) { i[my_rank] = 0; index -= offset(my_rank)*(dimension(my_rank)-1); } else { index += offset(my_rank); break; } } } // When assigning a scalar to a whole array, there may be // advantage in specialist behaviour depending on the rank of the // array. This is a generic one that copies the number but treats // the present array as passive. template typename internal::enable_if::type assign_inactive_scalar_(X x) { ExpressionSize i(0); Index index = 0; int my_rank; do { // Innermost loop - note that the counter is index, not max_index for (Index max_index = index + dimension_::value*offset_::value; index < max_index; index += offset_::value) { data_[index] = x; } // Increment counters appropriately depending on which // dimensions have been finished advance_index(index, my_rank, i); } while (my_rank >= 0); } // An active array being assigned the value of an inactive scalar template typename internal::enable_if::type assign_inactive_scalar_(X x) { // If not recording we call the inactive version instead #ifdef ADEPT_RECORDING_PAUSABLE if (! ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar_(x); return; } #endif ExpressionSize i(0); Index gradient_ind = gradient_index(); Index index = 0; int my_rank; do { // Innermost loop ADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimension_::value, offset_::value); for (Index max_index = index + dimension_::value*offset_::value; index < max_index; index += offset_::value) { data_[index] = x; } // Increment counters appropriately depending on which // dimensions have been finished advance_index(index, my_rank, i); } while (my_rank >= 0); } // When copying an expression to a whole array, there may be // advantage in specialist behaviour depending on the rank of the // array template typename internal::enable_if::type assign_expression_(const E& rhs) { ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY); ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; static const int last = LocalRank-1; do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { data_[index] = rhs.next_value(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_expression_(const E& rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; static const int last = LocalRank-1; ADEPT_ACTIVE_STACK->check_space(internal::expr_cast::n_active * size()); do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, ind); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active? } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_expression_(const E& rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> ind(0); Index index = 0; int my_rank; Index gradient_ind = gradient_index(); static const int last = LocalRank-1; do { i[last] = 0; rhs.set_location(i, ind); // Innermost loop ADEPT_ACTIVE_STACK->push_lhs_range(gradient_ind+index, dimension_::value, offset_::value); for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { data_[index] = rhs.next_value(ind); } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) { ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); Index index = 0; int my_rank; static const int last = rank-1; do { i[last] = 0; bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { if (bool_expr.next_value(bool_ind)) { data_[index] = rhs; } } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_inactive_scalar_(const B& bool_expr, C rhs) { #ifdef ADEPT_RECORDING_PAUSABLE if (! ADEPT_ACTIVE_STACK->is_recording()) { assign_conditional_inactive_scalar_(bool_expr, rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); Index index = 0; int my_rank; static const int last = rank-1; do { i[last] = 0; bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { if (bool_expr.next_value(bool_ind)) { data_[index] = rhs; ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); } } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_(const B& bool_expr, const C& rhs) { ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); ExpressionSize::n_arrays> rhs_ind(0); Index index = 0; int my_rank; static const int last = rank-1; bool is_gap = false; do { i[last] = 0; rhs.set_location(i, rhs_ind); bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { if (bool_expr.next_value(bool_ind)) { if (is_gap) { rhs.set_location(i, rhs_ind); is_gap = false; } data_[index] = rhs.next_value(rhs_ind); } else { is_gap = true; } } advance_index(index, my_rank, i); } while (my_rank >= 0); } template typename internal::enable_if::type assign_conditional_(const B& bool_expr, const C& rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_conditional_(bool_expr, rhs); return; } #endif ExpressionSize i(0); ExpressionSize::n_arrays> bool_ind(0); ExpressionSize::n_arrays> rhs_ind(0); Index index = 0; int my_rank; static const int last = rank-1; bool is_gap = false; ADEPT_ACTIVE_STACK->check_space(internal::expr_cast::n_active * size()); do { i[last] = 0; rhs.set_location(i, rhs_ind); bool_expr.set_location(i, bool_ind); // Innermost loop for ( ; i[last] < dimension_::value; ++i[last], index += offset_::value) { if (bool_expr.next_value(bool_ind)) { if (is_gap) { rhs.set_location(i, rhs_ind); is_gap = false; } data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, rhs_ind); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); // What if RHS not active? } else { is_gap = true; } } advance_index(index, my_rank, i); } while (my_rank >= 0); } // ------------------------------------------------------------------- // FixedArray: 8. Data // ------------------------------------------------------------------- protected: Type data_[length_]; // Stored on the stack }; // End of FixedArray class // ------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------- // Print array on a stream template inline std::ostream& operator<<(std::ostream& os, const FixedArray& A) { const Array::rank,Type,IsActive> B = A; // link to original data return B.print(os); } // Extract inactive part of array, working correctly depending on // whether argument is active or inactive template inline FixedArray& value(FixedArray& expr) { return expr; } template inline FixedArray value(FixedArray& expr) { return expr.inactive_link(); } // ------------------------------------------------------------------- // Transpose function // ------------------------------------------------------------------- // Transpose 2D array template inline Array<2,Type,IsActive> transpose(FixedArray& in) { // Create output array initially as link to input array Array<2,Type,IsActive> out(in); // Swap dimensions return out.in_place_transpose(); } // Extract the gradients from an active FixedArray after the // Stack::forward or Stack::reverse functions have been called template inline void get_gradients(const FixedArray& a, FixedArray& data) { data = a.get_gradient(); } template internal::Allocator::rank, FixedArray > operator<<(FixedArray& array, const E& x) { return internal::Allocator::rank, FixedArray >(array, x); } } // End namespace adept #endif ================================================ FILE: include/adept/GradientIndex.h ================================================ #ifndef AdeptGradientIndex_H #define AdeptGradientIndex_H 1 #include namespace adept { namespace internal { // Arrays inherit from this class to provide optional storage of // the gradient index of the first value of the array depending on // whether the array is active or not template struct GradientIndex { // Constructor used when linking to existing data where gradient // index is known GradientIndex(Index val = -9999) : value_(val) { } // Constructor used for fixed array objects where length is // known GradientIndex(Index n, bool) : value_(ADEPT_ACTIVE_STACK->register_gradients(n)) { } GradientIndex(Index val, Index offset) : value_(val+offset) { } Index get() const { return value_; } void set(Index val) { value_ = val; } void clear() { value_ = -9999; } template void set(const Type* data, const Storage* storage) { value_ = (storage->gradient_index() + (data - storage->data())); } void assert_inactive() { throw invalid_operation("Operation applied that is invalid with active arrays" ADEPT_EXCEPTION_LOCATION); } void unregister(Index n) { ADEPT_ACTIVE_STACK->unregister_gradients(value_, n); } #ifdef ADEPT_MOVE_SEMANTICS void swap_value(GradientIndex& rhs) noexcept { Index tmp_value = rhs.get(); rhs.set(value_); value_ = tmp_value; } #endif private: Index value_; }; template <> struct GradientIndex { GradientIndex(Index val = -9999) { } GradientIndex(Index, bool) { } GradientIndex(Index val, Index offset) { } Index get() const { return -9999; } void set(Index val) { } void clear() { } template void set(const Type* data, const Storage* storage) { } void assert_inactive() { } void unregister(Index) { } #ifdef ADEPT_MOVE_SEMANTICS void swap_value(GradientIndex& rhs) noexcept { } #endif }; }; }; #endif ================================================ FILE: include/adept/IndexedArray.h ================================================ /* IndexedArray.h -- Support for indexed arrays Copyright (C) 2015-2018 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. If an Array is indexed via A(i,j,...) then there are three possible return values: (1) a scalar, if all indices are scalar integers (including 0-rank expressions such as "end"); (2) an Array that links to a subset of the data in the original Array, if one or more of the indices is a RangeIndex object and all the rest are scalar integers; and (3) an IndexedArray object, if one or more of the indices is a vector of integers. All of these return values can be used on the left-hand-side of an expression. This file treats the last case. The code is quite complex because the rank of the IndexedArray may be reduced compared to the original Array, since dimensions indexed by scalar integers are removed in IndexedArray. */ #ifndef AdeptIndexedArray_H #define AdeptIndexedArray_H 1 #include #include namespace adept { // --------------------------------------------------------------------- // Section 0: Forward declarations // --------------------------------------------------------------------- template class Array; namespace internal { // --------------------------------------------------------------------- // Section 1. get_size_with_len // --------------------------------------------------------------------- // Return the size of an index to an individual dimension, with // specializations for the different types of index. The second // argument passes in the length of the dimension being indexed; // that way if any of the indices are expressions containing // "end", this will be replaced by that dimension length minus 1. // A scalar integer and rank-0 expression have a size of unity inline Index get_size_with_len(const Index& j, const Index&) { return 1; } template inline typename enable_if::is_integer && E::rank == 0, Index>::type get_size_with_len(const Expression&, const Index& len) { return 1; } // Extract the length of an IntVector template inline typename enable_if::is_integer && E::rank == 1 && !is_range::value, Index>::type get_size_with_len(const Expression& e, const Index& len) { ExpressionSize<1> s; e.get_dimensions(s); return s[0]; } // Extract the length of a RangeIndex object, which might be // dependent on len if "end" is present template inline typename enable_if::is_integer && is_range::value, Index>::type get_size_with_len(const Expression& e, const Index& len) { return e.cast().size_with_len_(len); } // Allow std::vector to be used to index Arrays template inline typename enable_if::is_integer, Index>::type get_size_with_len(const std::vector& v, const Index&) { return v.size(); } // --------------------------------------------------------------------- // Section 2. get_value_with_len // --------------------------------------------------------------------- // Return the j'th value of index ind. #ifndef ADEPT_BOUNDS_CHECKING // For scalar indices there is only one value to return - j ought // to be zero but we don't check this inline Index get_value_with_len(const Index& ind, const Index& j, const Index&) { return ind; } template inline typename enable_if::is_integer && (E::rank < 2), Index>::type get_value_with_len(const Expression& ind, const Index& j, const Index& len) { return ind.value_with_len(j, len); } template inline Index get_value_with_len(const std::vector& ind, const Index& j, const Index&) { return ind[j]; } #else // For scalar indices there is only one value to return - j ought // to be zero but we don't check this inline Index get_value_with_len(const Index& ind, const Index& j, const Index& len) { if (j != 0) { throw index_out_of_bounds("Index to IndexedArray is out of bounds" ADEPT_EXCEPTION_LOCATION); } else if (ind < 0 || ind >= len) { throw index_out_of_bounds("Scalar index out of bounds in IndexedArray" ADEPT_EXCEPTION_LOCATION); } else { return ind; } } template inline typename enable_if::is_integer && (E::rank < 2), Index>::type get_value_with_len(const Expression& ind, const Index& j, const Index& len) { Index i = ind.value_with_len(j, len); if (i < 0 || i >= len) { throw index_out_of_bounds("Index out of bounds in IndexedArray" ADEPT_EXCEPTION_LOCATION); } else { return i; } } template inline Index get_value_with_len(const std::vector& ind, const Index& j, const Index& len) { Index i = ind[j]; if (i < 0 || i >= len) { throw index_out_of_bounds("Index from std::vector out of bounds in IndexedArray" ADEPT_EXCEPTION_LOCATION); } else { return i; } } #endif // --------------------------------------------------------------------- // Section 3. is_int_vector // --------------------------------------------------------------------- // is_int_vector::value is "true" if Type is a rank-1 // integer expression (including RangeIndex objects), false // otherwise. template struct is_int_vector { }; template struct is_int_vector::value>::type> { static const bool value = false; }; template struct is_int_vector::value>::type> { static const bool value = std::numeric_limits::is_integer && expr_cast::rank == 1; }; template struct is_index { static const bool value = is_regular_index::value || is_int_vector::value; static const int count = value; }; template struct is_irregular_index { static const bool value = !is_range::value && is_int_vector::value; static const int count = value; }; // --------------------------------------------------------------------- // Section 4. is_irregular_index // --------------------------------------------------------------------- // is_irregular_index::value is "true" if indices // I0 to I[Rank-1] contains at least one integer vector that could // be irregularly spaced, and all the other are valid indices. // The ::count member gives the number of non-scalar indices, // which is the rank of the IndexedArray objects resulting from // indexing an Array of the specified Rank with indices I0 to // I[Rank-1]. template struct is_irreg_indexed { static const bool value = ( is_irregular_index::value || is_irregular_index::value || is_irregular_index::value || is_irregular_index::value || is_irregular_index::value || is_irregular_index::value || is_irregular_index::value) && ( is_index::value && is_index::value && is_index::value && is_index::value && is_index::value && is_index::value && is_index::value); static const int count = 7 - ( is_scalar_int::count + is_scalar_int::count + is_scalar_int::count + is_scalar_int::count + is_scalar_int::count + is_scalar_int::count + is_scalar_int::count); }; // --------------------------------------------------------------------- // Section 5. IndexedArray class // --------------------------------------------------------------------- // A class holding references to an Array to be indexed, plus // references to the objects corresponding to each of its // dimension being indexed. IndexedArray objects are temporary, // generated by indexing an Array object "A" via A(i,j,...) within // an expression. The indices themselves may be temporary results // of integer expressions, but by C++ rules they will not be // deleted until the full expression is complete. template class IndexedArray : public Expression > { public: // --------------------------------------------------------------------- // Section 5.1. IndexedArray: Static definitions // --------------------------------------------------------------------- static const int rank = Rank; static const int n_scratch = 1; static const int n_active = IsActive; // We require three indices to be stored to optimize the // calculation of the location: first the location of the start // of the row, second the index to i[Rank-1] (0, 1, 2...), and // third the location passed to the Array static const int n_arrays = 3; static const bool is_active = IsActive; // The rank of the array being indexed may be higher than the // result of the index due to singleton indices // (e.g. Matrix(IntVector,int) has rank 1 even though Matrix has // rank 2). static const int a_rank = ArrayType::rank; // --------------------------------------------------------------------- // Section 5.2. IndexedArray: Constructors // --------------------------------------------------------------------- // Make default constructor that the compiler might generate // itself unreachable private: IndexedArray() { } public: // The constructor sets all unused indices to an integer of zero IndexedArray(ArrayType& a, const I0& i0, const I1& i1 = 0, const I2& i2 = 0, const I3& i3 = 0, const I4& i4 = 0, const I5& i5 = 0, const I6& i6 = 0) : a_(a), i0_(i0), i1_(i1), i2_(i2), i3_(i3), i4_(i4), i5_(i5), i6_(i6), a_dims_(a.dimensions()) { // Compute the dimensions of the IndexedArray objects from the // lengths of the non-singleton indices to Array set_dimensions_<0,0>(); // For stepping through memory efficiently in the inner loop, // we store the distance between elements in the fastest // varying dimension in Array last_offset_ = a.offset()[a_fastest_varying_dim]; } // --------------------------------------------------------------------- // Section 5.3. IndexedArray: Functions facilitating Expression functionality // --------------------------------------------------------------------- bool get_dimensions_(ExpressionSize& dim) const { dim = dimensions_; return true; } std::string info_string() const { std::stringstream s; s << expression_string_() << ", array-dim=" << a_dims_ << ", dim=" << dimensions_ << ", last-offset_=" << last_offset_; return s.str(); } std::string expression_string_() const { std::string str; str = a_.expression_string() + "("; str += expr_string(i0_); if (a_rank > 1) { str += std::string(",") + expr_string(i1_); if (a_rank > 2) { str += std::string(",") + expr_string(i2_); if (a_rank > 3) { str += std::string(",") + expr_string(i3_); if (a_rank > 4) { str += std::string(",") + expr_string(i4_); if (a_rank > 5) { str += std::string(",") + expr_string(i5_); if (a_rank > 6) { str += std::string(",") + expr_string(i6_); } } } } } } str += ")"; return str; } protected: // Helper functions for expression_string() template std::string expr_string(const Expression& e) const { return e.expression_string(); } template typename enable_if::value, std::string>::type expr_string(const T& e) const { std::stringstream s; s << e; return s.str(); } public: bool is_aliased_(const Type* mem1, const Type* mem2) const { return a_.is_aliased(mem1, mem2); } Type value_with_len_(const Index& i, const Index& len) const { // Treat as one dimensional return a_(get_value_with_len_(i)); } template void set_location_(const ExpressionSize& coords, ExpressionSize& loc) const { ExpressionSize a_coords; translate_coords_<0,0>(coords, a_coords); // Location of start of most rapidly varying dimension in // Array a_.template set_location_(a_coords, loc); // Index to most rapidly varying dimension in IndexedArray loc[MyArrayNum+1] = coords[Rank-1]; loc[MyArrayNum+2] = loc[MyArrayNum] + last_offset_ * get_value_with_len_(loc[MyArrayNum+1]); } // Advance the location of each array in the expression template void advance_location_(ExpressionSize& loc) const { ++loc[MyArrayNum+1]; // Note that next_value calls advance_location even when it // has reached the end of a row, in which case finding the // location of an indexed array is an invalid operation since // it would require accessing the indexing array out of // bounds. Hence the "if" test here. if (loc[MyArrayNum+1] < dimensions_[Rank-1]) { loc[MyArrayNum+2] = loc[MyArrayNum] + last_offset_ * get_value_with_len_(loc[MyArrayNum+1]); } } template Type value_at_location_(const ExpressionSize& loc) const { return a_.template value_at_location_(loc); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { ADEPT_STATIC_ASSERT(ArrayType::n_scratch == 0, ASSUMING_ARRAY_N_SCRATCH_IS_ZERO); return (scratch[MyScratchNum] = a_.template value_at_location_(loc)); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { a_.template calc_gradient_(stack, loc, scratch); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { a_.template calc_gradient_(stack, loc, scratch, multiplier); } // --------------------------------------------------------------------- // Section 5.4. IndexedArray: Operators // --------------------------------------------------------------------- // Operators so that IndexedArray can appear on the // left-hand-side of a statement IndexedArray& operator=(const IndexedArray& src) { *this = static_cast&>(src); return *this; } // Assignment to a single value copies to every element template typename enable_if::value, IndexedArray&>::type operator=(RType rhs) { if (!empty()) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif assign_inactive_scalar_(rhs); #ifdef ADEPT_RECORDING_PAUSABLE } else { assign_inactive_scalar_(rhs); } #endif } return *this; } public: // Assignment to an array expression of the same rank template typename enable_if::type operator=(const Expression& rhs) { // Definition moved to Array.h due to its dependence on the // Array class ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (!compatible(dims, dimensions_)) { std::string str = "Expr"; str += dims.str() + " object assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } if (!empty()) { #ifndef ADEPT_NO_ALIAS_CHECKING // Check for aliasing first Type const * ptr_begin; Type const * ptr_end; a_.data_range(ptr_begin, ptr_end); if (rhs.is_aliased(ptr_begin, ptr_end)) { Array copy; copy = noalias(rhs); assign_expression_(copy); } else { #endif assign_expression_(rhs); #ifndef ADEPT_NO_ALIAS_CHECKING } #endif } return *this; } // Assign active scalar expression to an active array by first // converting the RHS to an active scalar template typename enable_if 0) && IsActive && !E::is_lvalue, IndexedArray&>::type operator=(const Expression& rhs) { Active x = rhs; *this = x; return *this; } // Assign an active scalar to an active array template typename enable_if::value && IsActive, IndexedArray&>::type operator=(const Active& rhs) { ADEPT_STATIC_ASSERT(IsActive, ATTEMPT_TO_ASSIGN_ACTIVE_SCALAR_TO_INACTIVE_INDEXED_ARRAY); if (!empty()) { #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar_(rhs.scalar_value()); return *this; } #endif ExpressionSize coords(0); ExpressionSize a_coords(0); ExpressionSize<1> a_loc(0); Type val = rhs.scalar_value(); int dim; static const int last = Rank-1; do { coords[last] = 0; // Convert between the coordinates of the IndexedArray // object to the coordinates of the Array object translate_coords_<0,0>(coords, a_coords); a_.set_location(a_coords, a_loc); // Innermost loop for ( ; coords[last] < dimensions_[last]; ++coords[last]) { Index index = a_loc[0] + last_offset_ * get_value_with_len_(coords[last]); a_.data()[index] = val; ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index()); ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index); } advance_index(dim, coords); } while (dim >= 0); } return *this; } #define ADEPT_DEFINE_OPERATOR(OPERATOR, OPSYMBOL) \ template \ IndexedArray& OPERATOR(const RType& rhs) { \ return *this = noalias(*this) OPSYMBOL rhs; \ } ADEPT_DEFINE_OPERATOR(operator+=, +) ADEPT_DEFINE_OPERATOR(operator-=, -) ADEPT_DEFINE_OPERATOR(operator*=, *) ADEPT_DEFINE_OPERATOR(operator/=, /) // ADEPT_DEFINE_OPERATOR(operator&=, &); // ADEPT_DEFINE_OPERATOR(operator|=, |); #undef ADEPT_DEFINE_OPERATOR #ifdef ADEPT_CXX11_FEATURES // To enable assignment to an initializer list we take a simple // but inefficient strategy of creating a temporary Array and // assigning to that template IndexedArray& operator=(std::initializer_list list) { ADEPT_STATIC_ASSERT(Rank==1,RANK_MISMATCH_IN_INITIALIZER_LIST); Array array = list; return (*this = array); } template IndexedArray& operator=(std::initializer_list< std::initializer_list > list) { ADEPT_STATIC_ASSERT(Rank==2,RANK_MISMATCH_IN_INITIALIZER_LIST); Array array = list; return (*this = array); } template IndexedArray& operator=(std::initializer_list< std::initializer_list< std::initializer_list > > list) { ADEPT_STATIC_ASSERT(Rank==3,RANK_MISMATCH_IN_INITIALIZER_LIST); Array array = list; return (*this = array); } template IndexedArray& operator=(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > list) { ADEPT_STATIC_ASSERT(Rank==4,RANK_MISMATCH_IN_INITIALIZER_LIST); Array array = list; return (*this = array); } template IndexedArray& operator=(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > list) { ADEPT_STATIC_ASSERT(Rank==5,RANK_MISMATCH_IN_INITIALIZER_LIST); Array array = list; return (*this = array); } template IndexedArray& operator=(std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list< std::initializer_list > > > > > list) { ADEPT_STATIC_ASSERT(Rank==6,RANK_MISMATCH_IN_INITIALIZER_LIST); Array array = list; return (*this = array); } #endif protected: // --------------------------------------------------------------------- // Section 5.5. IndexedArray: Internal functions facilitating operator= // --------------------------------------------------------------------- // Two versions of assigning an inactive scalar to an indexed // array depending on whether the indexed array is active - // first the case when it is not template typename enable_if::type assign_inactive_scalar_(X x) { ExpressionSize coords(0); ExpressionSize a_coords(0); ExpressionSize<1> a_loc(0); int dim; static const int last = Rank-1; do { coords[last] = 0; // Convert between the coordinates of the IndexedArray // object to the coordinates of the Array object translate_coords_<0,0>(coords, a_coords); a_.set_location(a_coords, a_loc); // Innermost loop for ( ; coords[last] < dimensions_[last]; ++coords[last]) { a_.data()[a_loc[0] + last_offset_ * get_value_with_len_(coords[last])] = x; } advance_index(dim, coords); } while (dim >= 0); } // Active version of assigning an inactive scalar template typename enable_if::type assign_inactive_scalar_(X x) { // If not recording we call the inactive version instead #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar_(x); return; } #endif ExpressionSize coords(0); ExpressionSize a_coords(0); ExpressionSize<1> a_loc(0); int dim; static const int last = Rank-1; do { coords[last] = 0; // Convert between the coordinates of the IndexedArray // object to the coordinates of the Array object translate_coords_<0,0>(coords, a_coords); a_.set_location(a_coords, a_loc); // Innermost loop for ( ; coords[last] < dimensions_[last]; ++coords[last]) { Index index = a_loc[0] + last_offset_ * get_value_with_len_(coords[last]); a_.data()[index] = x; ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index); } advance_index(dim, coords); } while (dim >= 0); } // Assign expression has two versions, passive and active template typename enable_if::type assign_expression_(const E& rhs) { ADEPT_STATIC_ASSERT(!RightIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_INDEXED_ARRAY); ExpressionSize coords(0); ExpressionSize a_coords(0); ExpressionSize::n_arrays> loc(0); ExpressionSize<1> a_loc(0); int dim; static const int last = Rank-1; do { coords[last] = 0; rhs.set_location(coords, loc); // Convert between the coordinates of the IndexedArray // object to the coordinates of the Array object translate_coords_<0,0>(coords, a_coords); a_.set_location(a_coords, a_loc); // Innermost loop for ( ; coords[last] < dimensions_[last]; ++coords[last]) { a_.data()[a_loc[0] + last_offset_ * get_value_with_len_(coords[last])] = rhs.next_value(loc); } advance_index(dim, coords); } while (dim >= 0); } // Active LHS, passive RHS template typename enable_if::type assign_expression_(const E& rhs) { #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize coords(0); ExpressionSize a_coords(0); ExpressionSize::n_arrays> loc(0); ExpressionSize<1> a_loc(0); int dim; static const int last = Rank-1; do { coords[last] = 0; rhs.set_location(coords, loc); // Convert between the coordinates of the IndexedArray // object to the coordinates of the Array object translate_coords_<0,0>(coords, a_coords); a_.set_location(a_coords, a_loc); // Innermost loop for ( ; coords[last] < dimensions_[last]; ++coords[last]) { Index index = a_loc[0] + last_offset_ * get_value_with_len_(coords[last]); a_.data()[index] = rhs.next_value(loc); ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index); } advance_index(dim, coords); } while (dim >= 0); } // Active LHS, active RHS template typename enable_if::type assign_expression_(const E& rhs) { #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize coords(0); ExpressionSize a_coords(0); ExpressionSize::n_arrays> loc(0); ExpressionSize<1> a_loc(0); int dim; static const int last = Rank-1; ADEPT_ACTIVE_STACK->check_space(expr_cast::n_active * dimensions_[0]); do { coords[last] = 0; rhs.set_location(coords, loc); // Convert between the coordinates of the IndexedArray // object to the coordinates of the Array object translate_coords_<0,0>(coords, a_coords); a_.set_location(a_coords, a_loc); // Innermost loop for ( ; coords[last] < dimensions_[last]; ++coords[last]) { Index index = a_loc[0] + last_offset_ * get_value_with_len_(coords[last]); a_.data()[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc); ADEPT_ACTIVE_STACK->push_lhs(a_.gradient_index()+index); } advance_index(dim, coords); } while (dim >= 0); } // Move to the start of the next row void advance_index(int& dim, ExpressionSize& coords) const { dim = Rank-1; while (--dim >= 0) { if (++coords[dim] >= dimensions_[dim]) { coords[dim] = 0; } else { break; } } } bool empty() { return dimensions_[0] == 0; } // Declare I as it is used before it is defined template struct Ix; // Translate coordinates in terms of the IndexedArray object in // to coordinates to the Array object it wraps, accounting for // singleton dimensions in Array that are not included in the // dimensions that IndexedArray presents to external objects template typename enable_if::type>::value && (InDim < Rank-1), void>::type translate_coords_(const ExpressionSize& in, ExpressionSize& out) const { // Compute the index of the OutDim dimension of Array out[OutDim] = get_value_with_len(index_object_(), in[InDim],a_dims_[OutDim]); // Move on to the next dimension translate_coords_(in, out); } template typename enable_if<(OutDim < a_rank) && is_scalar_int::type>::value, void>::type translate_coords_(const ExpressionSize& in, ExpressionSize& out) const { // This is a singleton dimension so the 0th element is the // only element out[OutDim] = get_value_with_len(index_object_(), 0,a_dims_[OutDim]); // Move on to the next OutDim dimension of Array translate_coords_(in, out); } template typename enable_if::type>::value && InDim == Rank-1, void>::type translate_coords_(const ExpressionSize& in, ExpressionSize& out) const { // The final non-singleton dimension is set to zero, since it // will be incremented later by advance_location out[OutDim] = 0; // Do any further dimensions, which must be singletons translate_coords_(in, out); } // Run out of dimensions: do nothing template typename enable_if::type translate_coords_(const ExpressionSize& in, ExpressionSize& out) const { } template Index get_value_with_len_(const Index& j) const { return get_value_with_len(index_object_(), j, a_dims_[Dim]); //return get_value_with_len(index_object_(), j, dimensions_[Dim]); } // --------------------------------------------------------------------- // Section 5.6. IndexedArray: Helper functions for the constructor // --------------------------------------------------------------------- // Helper function for translating between the dimensions of the // Array object and that of the IndexedArray, the latter of // which has removed the singleton dimensions of the former template typename enable_if<(OutDim < a_rank) && !is_scalar_int::type>::value,void>::type set_dimensions_() { dimensions_[InDim] = get_size_with_len(index_object_(), a_dims_[OutDim]); set_dimensions_(); } template typename enable_if<(OutDim < a_rank) && is_scalar_int::type>::value,void>::type set_dimensions_() { set_dimensions_(); } template typename enable_if::type set_dimensions_() { } // --------------------------------------------------------------------- // Section 5.7. IndexedArray: Low-level helper sub-classes and functions // --------------------------------------------------------------------- // The individual indices are stored in objects of type I0 to // I[Rank-1]. The following sub-class "index_alias" enables the // definition of the sub-class I that is used such that // Ix::type returns the type of index "Dim" at compile time. template struct index_alias { }; template struct index_alias<0,X0,X1,X2,X3,X4,X5,X6> { typedef X0 type; }; template struct index_alias<1,X0,X1,X2,X3,X4,X5,X6> { typedef X1 type; }; template struct index_alias<2,X0,X1,X2,X3,X4,X5,X6> { typedef X2 type; }; template struct index_alias<3,X0,X1,X2,X3,X4,X5,X6> { typedef X3 type; }; template struct index_alias<4,X0,X1,X2,X3,X4,X5,X6> { typedef X4 type; }; template struct index_alias<5,X0,X1,X2,X3,X4,X5,X6> { typedef X5 type; }; template struct index_alias<6,X0,X1,X2,X3,X4,X5,X6> { typedef X6 type; }; template struct Ix { typedef typename index_alias::type type; }; // Similarly, the following enables us to return not just the // type but also a reference to the actual index object via // index_object_() template typename enable_if::type index_object_() const { return i0_; } template typename enable_if::type index_object_() const { return i1_; } template typename enable_if::type index_object_() const { return i2_; } template typename enable_if::type index_object_() const { return i3_; } template typename enable_if::type index_object_() const { return i4_; } template typename enable_if::type index_object_() const { return i5_; } template typename enable_if::type index_object_() const { return i6_; } // The following sub-class "fastest_varying" enables the // definition of "a_fastest_varying_dim" static constant integer // that contains the dimension of Array that varies fastest when // progessing through memory and is not a singleton. This // corresponds to the dimension "Rank-1" of IndexedArray. template struct fastest_varying { static const int value = is_scalar_int::type>::value ? fastest_varying::value : Dim; }; template struct fastest_varying<0,X0,X1,X2,X3,X4,X5,X6> { static const int value = 0; }; static const int a_fastest_varying_dim = fastest_varying<6,I0,I1,I2,I3,I4,I5,I6>::value; // --------------------------------------------------------------------- // Section 5.8. IndexedArray: Data // --------------------------------------------------------------------- // Reference to the array being indexed ArrayType& a_; // Individual indices to up to seven dimensions const I0& i0_; const I1& i1_; const I2& i2_; const I3& i3_; const I4& i4_; const I5& i5_; const I6& i6_; // Dimensions of the array being indexed (cannot be a reference // because FixedArrays do not store their dimensions explicitly) ExpressionSize a_dims_; // Dimensions of the IndexedArray ExpressionSize dimensions_; // Separation of elements of the array objects in the dimension // that varies fastests Index last_offset_; }; // End class IndexedArray } // End namespace internal } // End namespace adept #endif ================================================ FILE: include/adept/Minimizer.h ================================================ /* Minimizer.h -- class for minimizing the cost function of an optimizable object Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptMinimizer_H #define AdeptMinimizer_H 1 #include namespace adept { enum MinimizerAlgorithm { MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS = 0, MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT, // Polak-Ribiere MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR, // Fletcher-Reeves MINIMIZER_ALGORITHM_LEVENBERG, MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT, MINIMIZER_ALGORITHM_NUMBER_AVAILABLE }; enum MinimizerStatus { MINIMIZER_STATUS_SUCCESS = 0, MINIMIZER_STATUS_EMPTY_STATE, MINIMIZER_STATUS_MAX_ITERATIONS_REACHED, MINIMIZER_STATUS_FAILED_TO_CONVERGE, MINIMIZER_STATUS_DIRECTION_UPHILL, MINIMIZER_STATUS_BOUND_REACHED, // Only returned from line-search MINIMIZER_STATUS_INVALID_COST_FUNCTION, MINIMIZER_STATUS_INVALID_GRADIENT, MINIMIZER_STATUS_INVALID_BOUNDS, MINIMIZER_STATUS_NUMBER_AVAILABLE, MINIMIZER_STATUS_NOT_YET_CONVERGED }; // Return a C string describing the minimizer status const char* minimizer_status_string(MinimizerStatus status); // Return the order of a minimization algorithm: 0 indicates only // the cost function is required, 1 indicates the first derivative // is required, 2 indicates the second derivative is required, while // -1 indicates that the algorithm is not recognized. inline int minimizer_algorithm_order(MinimizerAlgorithm algo) { switch (algo) { case MINIMIZER_ALGORITHM_LIMITED_MEMORY_BFGS: case MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT: case MINIMIZER_ALGORITHM_CONJUGATE_GRADIENT_FR: return 1; break; case MINIMIZER_ALGORITHM_LEVENBERG: case MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT: return 2; break; default: return -1; } } // Convenience function for initializing vectors representing the // lower and upper bounds on state variables inline void minimizer_initialize_bounds(int nx, adept::Vector& x_lower, adept::Vector& x_upper) { x_lower.resize(nx); x_upper.resize(nx); x_lower = -std::numeric_limits::max(); x_upper = std::numeric_limits::max(); } // A class that can minimize a function using various algorithms class Minimizer { public: // Tedious C++98 initializations Minimizer(MinimizerAlgorithm algo) { initialize_default_settings(); set_algorithm(algo); } Minimizer(const std::string& algo) { initialize_default_settings(); set_algorithm(algo); } void initialize_default_settings() { max_iterations_ = 100; // <=0 means no limit max_step_size_ = -1.0; converged_gradient_norm_ = 0.1; ensure_updated_state_ = -1; levenberg_damping_min_ = 1.0/128.0; levenberg_damping_max_ = 100000.0; levenberg_damping_multiplier_ = 2.0; levenberg_damping_divider_ = 5.0; levenberg_damping_start_ = 0.0; levenberg_damping_restart_ = 1.0/4.0; max_line_search_iterations_ = 10; armijo_coeff_ = 1.0e-4; cg_curvature_coeff_ = 0.1; lbfgs_curvature_coeff_ = 0.9; lbfgs_n_states_ = 6; } // Unconstrained minimization MinimizerStatus minimize(Optimizable& optimizable, Vector x); // Constrained minimization MinimizerStatus minimize(Optimizable& optimizable, Vector x, const Vector& x_lower, const Vector& x_upper); // Functions to set parameters defining the general behaviour of // minimization algorithms void set_algorithm(MinimizerAlgorithm algo) { algorithm_ = algo; } void set_algorithm(const std::string& algo); void set_max_iterations(int mi) { max_iterations_ = mi; } void set_converged_gradient_norm(Real cgn) { converged_gradient_norm_ = cgn; } void set_max_step_size(Real mss) { max_step_size_ = mss; } // Ensure that the last call to compute the cost function uses the // "solution" state vector returned by minimize. This ensures that // any variables in user classes that inherit from Optimizable are // up to date with the returned state vector. The "order" argument // indicates which the order of derivatives required (provided // they are supported by the minimizing algorithm): // 0=cost_function, 1=cost_function_gradient, // 2=cost_function_gradient_hessian. void ensure_updated_state(int order = 2) { ensure_updated_state_ = order; } // Return parameters defining behaviour of minimization algorithms MinimizerAlgorithm algorithm() { return algorithm_; } std::string algorithm_name(); int max_iterations() { return max_iterations_; } Real converged_gradient_norm() { return converged_gradient_norm_; } // Functions to set parameters defining the behaviour of the // Levenberg and Levenberg-Marquardt algorithm void set_levenberg_damping_limits(Real damp_min, Real damp_max); void set_levenberg_damping_start(Real damp_start); void set_levenberg_damping_restart(Real damp_restart); void set_levenberg_damping_multiplier(Real damp_multiply, Real damp_divide); // Functions to set parameters used by the L-BFGS and // Conjugate-Gradient algorithms void set_max_line_search_iterations(int mi) { max_line_search_iterations_ = mi; } void set_armijo_coeff(Real ac) { if (ac <= 0.0 || ac >= 1.0) { throw optimization_exception("Armijo coefficient must be greater than 0 and less than 1"); } else { armijo_coeff_ = ac; } } void set_lbfgs_curvature_coeff(Real lcc) { if (lcc <= 0.0 || lcc >= 1.0) { throw optimization_exception("L-BFGS curvature coefficient must be greater than 0 and less than 1"); } else { lbfgs_curvature_coeff_ = lcc; } } void set_cg_curvature_coeff(Real cgcc) { if (cgcc <= 0.0 || cgcc >= 1.0) { throw optimization_exception("Conjugate-Gradient curvature coefficient must be greater than 0 and less than 1"); } else { cg_curvature_coeff_ = cgcc; } } // Query aspects of the algorithm progress after it has completed int n_iterations() const { return n_iterations_; } int n_samples() const { return n_samples_; } Real cost_function() const { return cost_function_; } Real gradient_norm() const { return gradient_norm_; } Real start_cost_function() const { return start_cost_function_; } MinimizerStatus status() const { return status_; } protected: // Specific minimization algorithms // The Limited-Memory Broyden-Fletcher-Goldfarb-Shanno algorithm MinimizerStatus minimize_limited_memory_bfgs(Optimizable& optimizable, Vector x); MinimizerStatus minimize_limited_memory_bfgs_bounded(Optimizable& optimizable, Vector x, const Vector& min_x, const Vector& max_x); // The Conjugate-Gradient algorithm; Polak-Ribiere by default, // optionally Fletcher-Reeves MinimizerStatus minimize_conjugate_gradient(Optimizable& optimizable, Vector x, bool use_fletcher_reeves = false); MinimizerStatus minimize_conjugate_gradient_bounded(Optimizable& optimizable, Vector x, const Vector& min_x, const Vector& max_x, bool use_fletcher_reeves = false); // The Levenberg-Marquardt algorithm; if use_additive_damping is // true then the Levenberg algorithm is used instead MinimizerStatus minimize_levenberg_marquardt(Optimizable& optimizable, Vector x, bool use_additive_damping = false); MinimizerStatus minimize_levenberg_marquardt_bounded(Optimizable& optimizable, Vector x, const Vector& min_x, const Vector& max_x, bool use_additive_damping = false); // Perform line search starting at state vector "x" with gradient // vector "gradient", and initial step "step_size" in // un-normalized direction "direction". Successful minimization of // the function (according to Wolfe conditions) will lead to // MINIMIZER_STATUS_SUCCESS being returned, the new state stored // in "x", and if state_up_to_date >= 1 then the gradient stored // in "gradient". Other possible return values are // MINIMIZER_STATUS_FAILED_TO_CONVERGE and // MINIMIZER_STATUS_DIRECTION_UPHILL if the initial direction // points uphill, or MINIMIZER_STATUS_INVALID_COST_FUNCTION, // MINIMIZER_STATUS_INVALID_GRADIENT or // MINIMIZER_STATUS_BOUND_REACHED. First the minimum is bracketed, // then a cubic polynomial is fitted to the values and gradients // of the function at the two points in order to select the next // test point. MinimizerStatus line_search(Optimizable& optimizable, Vector x, const Vector& direction, Vector test_x, Real& abs_step_size, Vector gradient, int& state_up_to_date, Real curvature_coeff, Real bound_step_size = -1.0); // Compute the cost function "cf" and gradient vector "gradient", // along with the scalar gradient "grad" in the search direction // "direction" (normalized with "dir_scaling"), from the state // vector "x" plus a step "step_size" in the search direction. If // the resulting cost function and gradient satisfy the Wolfe // conditions for sufficient convergence, copy the new state // vector to "x" and the step size to "final_step_size", and // return MINIMIZER_STATUS_SUCCESS. Otherwise, return // MINIMIZER_STATUS_NOT_YET_CONVERGED. Error conditions // MINIMIZER_STATUS_INVALID_COST_FUNCTION and // MINIMIZER_STATUS_INVALID_GRADIENT are also possible. MinimizerStatus line_search_gradient_check(Optimizable& optimizable, Vector x, const Vector& direction, Vector test_x, Real& final_step_size, Vector gradient, int& state_up_to_date, Real step_size, Real grad0, Real dir_scaling, Real& cost_function, Real& grad, Real curvature_coeff); // DATA // Minimizer type MinimizerAlgorithm algorithm_; // Variables controling the general behaviour of the minimizer, // used by all gradient-based algorithms int max_iterations_; // <=0 means no limit Real max_step_size_; Real converged_gradient_norm_; int ensure_updated_state_; // Variables controling the specific behaviour of the // Levenberg-Marquardt minimizer Real levenberg_damping_min_; Real levenberg_damping_max_; Real levenberg_damping_multiplier_; Real levenberg_damping_divider_; Real levenberg_damping_start_; Real levenberg_damping_restart_; // Variable used by the Conjugate-Gradient and L-BFGS minimizers int max_line_search_iterations_; // Armijo condition determined by this coefficient, the first of // the two Wolfe conditions Real armijo_coeff_; // Variables controlling the specific behaviour of the Conjugate // Gradient minimizer // Gradient in search direction must reduce by this amount Real cg_curvature_coeff_; // Variables controlling specific behaviour of L-BFGS minimizer // Gradient in search direction must reduce by this amount Real lbfgs_curvature_coeff_; // Number of prevous states to store int lbfgs_n_states_; // Variables set during the running of an algorithm and available // to the user afterwards // Number of iterations that successfully reduced the cost function int n_iterations_; // Number of calculations of the cost function int n_samples_; Real start_cost_function_; Real cost_function_; Real gradient_norm_; MinimizerStatus status_; }; // Implement inline member functions // Functions to set parameters defining the behaviour of the // Levenberg and Levenberg-Marquardt algorithm inline void Minimizer::set_levenberg_damping_limits(Real damp_min, Real damp_max) { if (damp_min <= 0.0) { throw optimization_exception("Minimum damping factor in Levenberg-Marquardt algorithm must be positive"); } else if (damp_max <= damp_min) { throw optimization_exception("Maximum damping factor must be greater than minimum in Levenberg-Marquardt algorithm"); } levenberg_damping_min_ = damp_min; levenberg_damping_max_ = damp_max; } inline void Minimizer::set_levenberg_damping_start(Real damp_start) { if (damp_start < 0.0) { throw optimization_exception("Start damping factor in Levenberg-Marquardt algorithm must be positive or zero"); } levenberg_damping_start_ = damp_start; } inline void Minimizer::set_levenberg_damping_restart(Real damp_restart) { if (damp_restart <= 0.0) { throw optimization_exception("Restart damping factor in Levenberg-Marquardt algorithm must be positive"); } levenberg_damping_restart_ = damp_restart; } inline void Minimizer::set_levenberg_damping_multiplier(Real damp_multiply, Real damp_divide) { if (damp_multiply <= 1.0 || damp_divide <= 1.0) { throw optimization_exception("Damping multipliers in Levenberg-Marquardt algorithm must be greater than one"); } levenberg_damping_multiplier_ = damp_multiply; levenberg_damping_divider_ = damp_divide; } }; #endif ================================================ FILE: include/adept/Optimizable.h ================================================ /* Optimizable.h -- abstract base classes representing an optimization problem Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptOptimizable_H #define AdeptOptimizable_H 1 #include namespace adept { // A class representing an optimization problem that can be solved // by Adept's Minimizer class. The user should define their own // class that publicly inherits from Optimizable and overrides the // member functions calc_cost_function and provides_derivative. // This is the minimum requirement to use in gradient-free // minimization algorithms (e.g. Nelder-Mead). To use in // quasi-Newton and conjugate-gradient minimization algorithms, the // user should also override the member function // calc_cost_function_gradient. To use in Newton-type minimization // algorithms such as Gauss-Newton and Levenberg-Marquardt, the user // should also override the member function // calc_cost_function_gradient_hessian. The user may optionally // override report_progress. class Optimizable { public: virtual ~Optimizable() { } // Return the cost function corresponding to the state vector x. virtual Real calc_cost_function(const adept::Vector& x) = 0; // Return the cost function corresponding to the state vector x, // and also set the "gradient" argument to the gradient of the // cost function with respect to each element of x. virtual Real calc_cost_function_gradient(const adept::Vector& x, adept::Vector gradient) { // If we get here then a gradient-based minimizer has been // applied to this class but the user has not implemented a // function to compute the gradient. throw optimization_exception("Gradient calculation has not been implemented"); } // Return the cost function corresponding to the state vector x, // and set the "gradient" argument to the gradient of the cost // function with respect to each element of x, and "hessian" to // the second derivative of the cost function with respect to x. virtual Real calc_cost_function_gradient_hessian(const adept::Vector& x, adept::Vector gradient, adept::SymmMatrix& hessian) { // If we get here then a Newton-type minimizer has been applied // to this class but the user has not implemented a function to // compute the Hessian matrix. throw optimization_exception("Hessian calculation has not been implemented"); } // This function is called at every iteration, and can be // overridden by child classes to report or store the progress at // each iteration, if required. By default it does nothing. virtual void report_progress(int niter, const adept::Vector& x, Real cost, Real gnorm) { } // Child classes should override this function to provide a // run-time mechanism to check which of the first and second // derivative (i.e. gradient and Hessian, respectively) are // available. If only the gradient is available then it could be // implemented as: if (order == 0 || order == 1) { return true; } // else { return false; } virtual bool provides_derivative(int order) = 0; }; }; #endif ================================================ FILE: include/adept/Packet.h ================================================ /* Packet.h -- Vectorization support Copyright (C) 2016-2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. A Packet contains a short vector of values, and when it is used in a limited set of arithmetic operations, the appropriate vector instructions will be used. For example if your hardware and compiler support SSE2 then Packet is a vector of 4x 4-byte floats while Packet is a vector of 2x 8-byte floats. This header file also provides for allocating aligned data */ #ifndef AdeptPacket_H #define AdeptPacket_H 1 #include #include #include // Headers needed for allocation of aligned memory #include #ifdef __unix__ #include // Defines _POSIX_VERSION #endif #include #ifdef _MSC_VER #include // Provides _aligned_malloc on Windows #endif #include #include // ------------------------------------------------------------------- // Determine how many floating point values will be held in a packet // ------------------------------------------------------------------- #ifndef ADEPT_FLOAT_PACKET_SIZE #define ADEPT_FLOAT_PACKET_SIZE QE_LONGEST_FLOAT_PACKET //static const int ADEPT_FLOAT_PACKET_SIZE = quick_e::longest_packet::size; #endif #ifndef ADEPT_DOUBLE_PACKET_SIZE #define ADEPT_DOUBLE_PACKET_SIZE QE_LONGEST_DOUBLE_PACKET //static const int ADEPT_DOUBLE_PACKET_SIZE = quick_e::longest_packet::size #endif // ------------------------------------------------------------------- // Determine how many floating point values will be held in packet of Real // ------------------------------------------------------------------- #if ADEPT_REAL_TYPE_SIZE == 4 #define ADEPT_REAL_PACKET_SIZE ADEPT_FLOAT_PACKET_SIZE #elif ADEPT_REAL_TYPE_SIZE == 8 #define ADEPT_REAL_PACKET_SIZE ADEPT_DOUBLE_PACKET_SIZE #else #define ADEPT_REAL_PACKET_SIZE 1 #endif namespace adept { namespace internal { // Trait to define packet size template struct packet_traits { static const int size = 1; }; template <> struct packet_traits { static const int size = ADEPT_FLOAT_PACKET_SIZE; }; template <> struct packet_traits { static const int size = ADEPT_DOUBLE_PACKET_SIZE; }; // ------------------------------------------------------------------- // Define packet type // ------------------------------------------------------------------- // Unfortunately, with C++98, unions cannot contain std::complex // because ith as a constructor... therefore Packet inherits from // PacketData to contain the data in order that union is only used // for Packets of types that are actually vectorized (which are // floats and doubles). template struct PacketData { // Static definitions static const int size = packet_traits::size; typedef typename quick_e::packet::type intrinsic_type; PacketData(intrinsic_type d) : data(d) { } union { intrinsic_type data; T value_[size]; }; T value() const { return value_[0]; } T& operator[](int i) { return value_[i]; } const T& operator[](int i) const { return value_[i]; } }; template struct PacketData::size == 1>::type> { // Static definitions static const int size = 1; typedef T intrinsic_type; PacketData(intrinsic_type d) : data(d) { } T data; T value() const { return data; } T& operator[](int i) { return data; } const T& operator[](int i) const { return data; } }; template struct Packet : public PacketData { using PacketData::data; static const int size = packet_traits::size; typedef typename quick_e::packet::type intrinsic_type; // static const int intrinsic_size = 1; // What is this for? static const std::size_t alignment_bytes = sizeof(intrinsic_type); // T=float/double -> all bits = 1 static const std::size_t align_mask = (size == 1) ? -1 : alignment_bytes-1; static const bool is_vectorized = (size > 1); // Constructors Packet() : PacketData(quick_e::set0()) { } Packet(const Packet& d) : PacketData(d.data) { } template Packet(TT d, typename enable_if::value,int>::type = 0) : PacketData(d) { } explicit Packet(const T* d) : PacketData(quick_e::load(d)) { } // explicit Packet(T d) : PacketData(quick_e::set1(d)) { } template explicit Packet(TT d, typename enable_if::value&&is_vectorized,int>::type = 0) : PacketData(quick_e::set1(d)) { } // Member functions void put(T* __restrict d) const { quick_e::store(d, data); } void put_unaligned(T* __restrict d) const { quick_e::storeu(d, data); } // void operator=(T d) { data = quick_e::set1(d); } template //, typename enable_if::value||is_same::value,int>::type = 0> void operator=(TT d) { data = quick_e::set1(d); } // void operator=(intrinsic_type d) { data = d; } void operator=(const Packet& d) { data = d.data; } void operator+=(const Packet& d) { data = quick_e::add(data, d.data); } void operator-=(const Packet& d) { data = quick_e::sub(data, d.data); } void operator*=(const Packet& d) { data = quick_e::mul(data, d.data); } void operator/=(const Packet& d) { data = quick_e::div(data, d.data); } Packet operator-() const { return quick_e::neg(data); } Packet operator+() const { return *this; } }; //#define QE_PACKET_ARG Packet #define QE_PACKET_ARG const Packet& __restrict // Default functions template Packet operator+(QE_PACKET_ARG x, QE_PACKET_ARG y) { return quick_e::add(x.data,y.data); } template Packet operator-(QE_PACKET_ARG x, QE_PACKET_ARG y) { return quick_e::sub(x.data,y.data); } template Packet operator*(QE_PACKET_ARG x, QE_PACKET_ARG y) { return quick_e::mul(x.data,y.data); } template Packet operator/(QE_PACKET_ARG x, QE_PACKET_ARG y) { return quick_e::div(x.data,y.data); } template Packet fmin(QE_PACKET_ARG x, QE_PACKET_ARG y) { return quick_e::fmin(x.data,y.data); } template Packet fmax(QE_PACKET_ARG x, QE_PACKET_ARG y) { return quick_e::fmax(x.data,y.data); } template Packet sqrt(QE_PACKET_ARG x) { using std::sqrt; using quick_e::sqrt; return sqrt(x.data); } template Packet fastexp(QE_PACKET_ARG x) { return quick_e::exp(x.data); } #ifdef ADEPT_FAST_EXPONENTIAL template Packet exp(QE_PACKET_ARG x) { return quick_e::exp(x.data); } #else template Packet exp(QE_PACKET_ARG x) { return std::exp(x.data); } #endif template T hsum(QE_PACKET_ARG x) { return quick_e::hsum(x.data); } template T hprod(QE_PACKET_ARG x) { return quick_e::hmul(x.data); } template T hmin(QE_PACKET_ARG x) { return quick_e::hmin(x.data); } template T hmax(QE_PACKET_ARG x) { return quick_e::hmax(x.data); } template std::ostream& operator<<(std::ostream& os, QE_PACKET_ARG x) { os << "{"; for (int i = 0; i < Packet::size; ++i) { os << " " << x[i]; } os << "}"; return os; } // ------------------------------------------------------------------- // Aligned allocation and freeing of memory // ------------------------------------------------------------------- template inline Type* alloc_aligned(Index n) { std::size_t n_align = Packet::alignment_bytes; if (n_align < sizeof(void*)) { // Note that the requested byte alignment passed to // posix_memalign must be at least sizeof(void*) return new Type[n]; } else { Type* result; #ifdef _POSIX_VERSION #if _POSIX_VERSION >= 200112L if (posix_memalign(reinterpret_cast(&result), n_align, n*sizeof(Type)) != 0) { throw std::bad_alloc(); } #else result = new Type[n]; #endif #elif defined(_MSC_VER) result = reinterpret_cast(_aligned_malloc(n*sizeof(Type), n_align)); if (result == 0) { throw std::bad_alloc(); } #else result = new Type[n]; #endif return result; } } template inline void free_aligned(Type* data) { // Note that we need to use the same condition as used in // alloc_aligned() in order that new[] is followed by delete[] // and posix_memalign is followed by free if (Packet::alignment_bytes < sizeof(void*)) { delete[] data; } else { #ifdef _POSIX_VERSION #if _POSIX_VERSION >= 200112L free(data); #else delete[] data; #endif #elif defined(_MSC_VER) _aligned_free(data); #else delete[] data; #endif } } // ------------------------------------------------------------------- // Check if templated object is a packet: is_packet // ------------------------------------------------------------------- template struct is_packet { static const bool value = false; }; template struct is_packet > { static const bool value = true; }; } // End namespace internal // ------------------------------------------------------------------- // Fast exponential function // ------------------------------------------------------------------- #ifdef ADEPT_FAST_SCALAR_EXPONENTIAL // Bring scalar exp from quick_e into this namespace inline float exp(float x) { return quick_e::exp(x); } inline double exp(double x) { return quick_e::exp(x); } #endif inline float fastexp(float x) { return quick_e::exp(x); } inline double fastexp(double x) { return quick_e::exp(x); } // This namespace is only for use in array operations namespace functions { #ifdef ADEPT_FAST_EXPONENTIAL // Bring scalar exp from quick_e into this namespace inline float exp(float x) { return quick_e::exp(x); } inline double exp(double x) { return quick_e::exp(x); } #else inline float exp(float x) { return std::exp(x); } inline double exp(double x) { return std::exp(x); } #endif } } // End namespace adept #endif ================================================ FILE: include/adept/RangeIndex.h ================================================ /* RangeIndex.h -- Helper classes to enable indexing of arrays Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. If an Array is indexed via A(i,j,...) then there are three possible return values: (1) a scalar, if all indices are scalar integers (including 0-rank expressions such as "end"); (2) an Array that links to a subset of the data in the original Array, if one or more of the indices is a RangeIndex object and all the rest are scalar integers; and (3) an IndexedArray object, if one or more of the indices is a vector of integers. All of these return values can be used on the left-hand-side of an expression. This file defines the RangeIndex class and associated helper types that facilitate the second case. A RangeIndex object expresses a sequence of regularly spaced integers, which may have a separation greater than 1 or a negative separation. Since an Array need not be contiguous in memory, when an Array is indexed by one or more RangeIndex objects the result is also a valid Array. RangeIndex objects are created by the range(begin,end) and stride(begin,end,stride) functions. This file also includes the EndIndex class to enable the use of "end" to express the final element of an array dimension being indexed (as in Matlab), and the AllIndex class to enable the use of "__" to express all elements of a dimension (as ":" in Fortran 90 and Matlab). */ #ifndef AdeptRangeIndex_H #define AdeptRangeIndex_H 1 #include namespace adept { namespace internal { // --------------------------------------------------------------------- // Section 1. EndIndex: enable Matlab-like "end" indexing // --------------------------------------------------------------------- // When an integer Expression is used as the index to another // expression, make "end" (or "adept::end") be interpretted as the // index of the final element of the array dimension being // referred to. If an whole multi-dimensional array is referred to // by a single integer Expression, then "end" is resolved to the // len-1 ("len" being the length of the dimension being indexed). // "end" is actually an instantiation of the "EndIndex" class, a // rank-0 expression. struct EndIndex : public Expression { // Static definitions static const int rank = 0; static const bool is_active = false; static const int n_scratch = 0; static const int n_arrays = 0; static const int n_active = 0; // Functions to implement Expression behaviour bool get_dimensions_(ExpressionSize<0>& dim) const { return true; } std::string expression_string_() const { return std::string("end"); } bool is_aliased_(const Index* mem1, const Index* mem2) const { return false; } Index value_with_len_(const Index& j, const Index& len) const { return len-1; } // Note that "end" can only be used as an index to an array or // expression: when used in any other context it will fail. template Index value_at_location_(const ExpressionSize&) const { throw array_exception("Cannot determine to which object the \"end\" index refers to" ADEPT_EXCEPTION_LOCATION); } }; // --------------------------------------------------------------------- // Section 2. get_index_with_len // --------------------------------------------------------------------- // We want range(x,y) and stride(x,y,z) to work for integer // arguments or for 0-rank expressions (including "end" and // constructs such as "end - 1"), so define the following helper // function. For an integer first argument, "get_index_with_len" // just returns the first argument, but for 0-rank expressions of // integer type, the second argument "len" is passed in and if the // expression contains an "end" then this resolves to len-1. #ifndef ADEPT_BOUNDS_CHECKING inline Index get_index_with_len(Index j, Index) { return j; } template inline typename enable_if::is_integer && E::rank == 0, Index>::type get_index_with_len(const Expression& j, Index len) { return j.value_with_len(0, len); } #else // Bounds-checking versions inline Index get_index_with_len(Index j, Index len) { if (j < 0 || j >= len) { throw index_out_of_bounds(); } else { return j; } } template inline typename enable_if::is_integer && E::rank == 0, Index>::type get_index_with_len(const Expression& j, Index len) { Index ind = j.value_with_len(0, len); if (ind < 0 || ind >= len) { throw index_out_of_bounds("Array index (probably generated from a scalar expression containing \"end\") is out of bounds" ADEPT_EXCEPTION_LOCATION); } else { return ind; } } #endif // get_stride_with_len is just like get_index_with_len except that // there is no need to do bounds checking inline Index get_stride_with_len(Index j, Index) { return j; } template inline typename enable_if::is_integer && E::rank == 0, Index>::type get_stride_with_len(const Expression& j, Index len) { return j.value_with_len(0, len); } // --------------------------------------------------------------------- // Section 3. get_value // --------------------------------------------------------------------- // If a RangeIndex object is not to be used as an index to an // array, we may wish to access its elements without consideration // of the length of a dimension. inline Index get_value(Index j) { return j; } template inline typename enable_if::is_integer && E::rank == 0, Index>::type get_value(const Expression& j) { return j.scalar_value(); } // --------------------------------------------------------------------- // Section 3. RangeIndex class // --------------------------------------------------------------------- // A class to store a range of integers, optionally with a fixed // stride, for simple indexing of arrays. template class RangeIndex : public Expression > { public: static const int rank = 1; static const bool is_active = false; static const int n_scratch = 0; static const int n_arrays = 1; static const int n_active = 0; // Construct with a specified stride RangeIndex(const BeginType& begin, const EndType& end, const StrideType& stride) : begin_(begin), end_(end), stride_(stride) { }; // Construct without a specified stride: defaults to 1 RangeIndex(const BeginType& begin, const EndType& end) : begin_(begin), end_(end), stride_(1) { }; Index size() const { return (end() - begin() + stride()) / stride(); } Index size_with_len_(const Index& len) const { return (end(len) - begin(len) + stride(len)) / stride(len); } bool get_dimensions_(ExpressionSize<1>& dim) const { dim[0] = size(); return true; } std::string expression_string_() const { std::stringstream s; s << "(" << begin() << ":" << end(); Index str = stride(); if (str != 1) { s << ":" << str; } s << ")"; return s.str(); } bool is_aliased_(const Index* mem1, const Index* mem2) const { return false; } bool all_arrays_contiguous_() const { return true; } // When this object is used as an index to another, the // following version of the function is called, in which the // "len" element is specified in order for the "end" index // specifier to work Index value_with_len_(const Index&j, const Index& len) const { return begin(len) + stride(len)*j; } // Advance the location of each array in the expression template void advance_location_(ExpressionSize& loc) const { ++loc[MyArrayNum]; } template void set_location_(const ExpressionSize<1>& i, ExpressionSize& index) const { } // Give the value at a particular offset template Index value_at_location_(const ExpressionSize& j) const { return begin() + stride()*j[MyArrayNum]; } // Access the beginning, end and stride, where the argument // gives the length of the dimension in case any of these is // expressed with respect to "end" (which resolves to length-1) Index begin() const { return get_value(begin_); } Index end() const { return get_value(end_); } Index stride() const { return get_value(stride_); } Index begin(Index len) const { return get_index_with_len(begin_, len); } Index end(Index len) const { return get_index_with_len(end_, len); } Index stride(Index len) const { return get_stride_with_len(stride_, len); } private: // Note that a copy rather than a reference to the Expression or // int is stored: this is because if range(i1, i2) is used as // the index to another object, then a temporary object will be // created that will be destroyed immediately after calling the // RangeIndex constructor (following ANSI C++ rules), so a // reference would then point to invalid data. // FIX!!! const BeginType begin_; const EndType end_; const StrideType stride_; }; // --------------------------------------------------------------------- // Section 4. AllIndex class // --------------------------------------------------------------------- // A class to represent all elements along one dimension, for simple // indexing of arrays with "__" (equivalent to ":" in Fortran). class AllIndex : public Expression { public: static const int rank = 1; static const bool is_active = false; static const int n_active = 0; static const int n_static_ = 0; static const int n_arrays = 0; // Unknown! // bool get_dimensions_(ExpressionSize<1>& dim) const { return true; } std::string expression_string_() const { return std::string("__"); } bool is_aliased_(const Index* mem1, const Index* mem2) const { return false; } Index size_with_len_(const Index& len) const { return len; } Index value_with_len_(const Index& j, const Index& len) const { return j; } Index value_at_location_(const ExpressionSize<1>& loc) const { return loc[0]; } Index begin(Index len = -1) const { return 0; } Index end(Index len) const { return len-1; } Index stride(Index len = -1) const { return 1; } }; // is_range::value is true if T is of type RangeIndex or // AllIndex template struct is_range { static const bool value = false; static const int count = 0; }; template <> struct is_range { static const bool value = true; static const int count = 1; }; template struct is_range > { static const bool value = true; static const int count = 1; }; // is_regular_index::value is true if T is a valid index to a // dimension of an Array such that the indexed object is also an // Array template struct is_regular_index { static const bool value = (is_scalar_int::value || is_null_type::value || is_range::value); }; // is_ranged<>::value is true if at least one of the template // arguments I0 to I[Rank-1] is of type RangeIndex, and all others // are of integer type template struct is_ranged { static const bool value = (is_range::value || is_range::value || is_range::value || is_range::value || is_range::value || is_range::value || is_range::value) && Rank == 7 - ( is_null_type::count + is_null_type::count + is_null_type::count + is_null_type::count + is_null_type::count + is_null_type::count) && ( is_regular_index::value && is_regular_index::value && is_regular_index::value && is_regular_index::value && is_regular_index::value && is_regular_index::value && is_regular_index::value); static const int count = is_range::count + is_range::count + is_range::count + is_range::count + is_range::count + is_range::count + is_range::count; }; } // End namespace internal // User-accessible functions and objects // The actual end object is held in a source file extern ::adept::internal::EndIndex end; // The actual "__" object is held in a source file extern ::adept::internal::AllIndex __; // Return a RangeIndex object representing all the integers between // "begin" and "end"; the inputs can either be Expressions or ints template inline adept::internal::RangeIndex range(const BeginType& begin, const EndType& end) { return adept::internal::RangeIndex(begin, end, 1); } // Return a RangeIndex object representing integers between "begin" // and "end" spaced "stride" apart template inline adept::internal::RangeIndex stride(const BeginType& begin, const EndType& end, const StrideType& stride) { return adept::internal::RangeIndex(begin, end, stride); } } // End namespace adept #endif ================================================ FILE: include/adept/ScratchVector.h ================================================ /* ScratchVector.h -- Class for holding temporary real data Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The ScratchVector class is used to store a temporary vector of real numbers (by default the type "Real", but could also be Packet) for use in optimally evaluating an expression and computing its derivative. Certain parts of the expression need to store their numerical value when first computed since it will be needed again in the derivative computation. In Adept 1.x such data were stored in the expression objects themselves, e.g. in adept::Multiply, but now that it is not clear at the level of an individual operation whether vectorization will be possible (requiring Packet), the storage for such scratch data must be held externally. */ #ifndef AdeptScratchVector_H #define AdeptScratchVector_H #include #include #include namespace adept { namespace internal { // Definition of ScratchVector class template class ScratchVector { public: // Constructors // By default no initialization is done ScratchVector() { #ifdef ADEPT_INIT_REAL initialize(); #endif } #ifdef ADEPT_INIT_REAL template typename internal::enable_if::value, void>::type initialize() { for (int is = 0; is < Size; ++is) { val[is] = ADEPT_INIT_REAL; } } template typename internal::enable_if::value, void>::type initialize() { } #endif // Set all dimensions to the same value ScratchVector(Type x) { set_all(x); } // Specify the values of all elements ScratchVector(Type x[Size]) { for (int i = 0; i < Size; ++i) { val[i] = x[i]; } } // Assume copy constructor will copy elements of val // Set all to specified value void set_all(Type x) { for (int i = 0; i < Size; ++i) { val[i] = x; } } // Copy from a ScratchVector object of the same rank void copy(const ScratchVector& d) { for (int i = 0; i < Size; ++i) { val[i] = d[i]; } } // ...or pointer to raw data void copy(const Type* d) { for (int i = 0; i < Size; ++i) { val[i] = d[i]; } } // Write out contents for debugging std::ostream& write(std::ostream& os) const { os << "{" << val[0]; for (int i = 1; i < Size; i++) { os << "," << val[i]; } return os << "}\n"; } // Const and non-const access to elements Type& operator[](int i) { return val[i]; } const Type& operator[](int i) const { return val[i]; } // Data private: Type val[Size]; }; // Specialization for scalars (zero-rank arrays) known at compile // time template <> class ScratchVector<0> { public: ScratchVector() { } template ScratchVector(T x) { } std::ostream& write(std::ostream& os) const { return os << "{}\n"; } }; // Write out all elements for debugging template inline std::ostream& operator<<(std::ostream& os, const ScratchVector& s) { return s.write(os); } } // End namespace internal } // End namespace adept #endif // AdeptScratchVector_H ================================================ FILE: include/adept/SpecialMatrix.h ================================================ /* SpecialMatrix.h -- Active or inactive symmetric and band-diagonal matrices Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The SpecialMatrix is the basis for a wide range of matrix types such as SquareMatrix, DiagonalMatrix, TridiagonalMatrix, SymmetricMatrix etc. */ #ifndef AdeptSpecialMatrix_H #define AdeptSpecialMatrix_H 1 #include #include #include #include #include #include #include #include #include #include namespace adept { // ------------------------------------------------------------------- // SpecialMatrix Engine helper classes // ------------------------------------------------------------------- enum SymmMatrixOrientation { ROW_LOWER_COL_UPPER=0, ROW_UPPER_COL_LOWER=1 }; namespace internal { // ------------------------------------------------------------------- // Conventional matrix storage engine // ------------------------------------------------------------------- // The SpecialMatrix class is assisted by data-free policy classes // that define the behaviour of different matrix types. The first // most basic one is for square matrices. Comments are provided // for the first one only to explain the meaning of each // function. The default here is ROW_MAJOR; the alternative // COL_MAJOR is provided as a specialization of this class. template struct SquareEngine { // The number of variables to store for a SpecialMatrix when it // is on the right-hand-side of an expression for its location static const int my_n_arrays = 1; // Used by SpecialMatrix::expression_string() to describe the // matrix type const char* name() const { return "SquareMatrix"; } // Used by SpecialMatrix::info_string() to describe the matrix // type std::string long_name() const { return "SquareMatrix"; } // The offset to use (the spacing in memory of elements along // the slowest varying dimension) for "packed" data, i.e. when // this matrix is created by the SpecialMatrix::resize function // rather than being a submatrix to something larger. Index pack_offset(Index dim) const { return dim; } // Provide the memory index to the element at row i, column j Index index(Index i, Index j, Index offset) const { return i*offset + j; } // When traversing along a row, this is the separation in memory // of each element template Index row_offset(Index offset, const ExpressionSize& loc) const { return 1; } // This function is used when a SpecialMatrix is used on the // left-hand-side of an expression. For row i, return the range // of columns containing unique elements in j_start and // j_end_plus_1, the memory location of the element // corresponding to j_start in index_start, and the separation // in memory of consecutive elements in this range void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = 0; j_end_plus_1 = dim; index_start = i*offset; index_stride = 1; } // Return value at row i, column j as an rvalue, first in the // case of an inactive array... template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return data[index(i,j,offset)]; } // ...now in the case of an active array. template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return Active(data[index(i,j,offset)]); } // Return value at row i, column j as an lvalue, first in the // case of an inactive array... template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { return data[index(i,j,offset)]; } // ...now in the case of an active array. template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } // Return the number of elements stored for a SpecialMatrix of // size dim x dim. This is used both by SpecialMatrix::resize // to know how much memory to allocate, and by // SpecialMatrix::is_aliased to know the memory range spanned by // the object. Index data_size(Index dim, Index offset) const { return (dim-1)*offset+dim; } // Memory offset of start of a superdiagonal (offdiag > 0) Index upper_offset(Index dim, Index offset, Index offdiag) const { return offdiag; } // Memory offset of start of a subdiagonal (offdiag < 0) Index lower_offset(Index dim, Index offset, Index offdiag) const { return -offdiag*offset; } // Check super- and sub-diagonals are in range, otherwise throw // an exception (errors only thrown for band matrices) void check_upper_diag(Index offdiag) const { } void check_lower_diag(Index offdiag) const { } // The type returned by the transpose .T() member function typedef SquareEngine transpose_engine; // Extra info to store when traversing a SpecialMatrix on the // right-hand-side of an expression template void set_extras(Index i, Index offset, ExpressionSize& index) const { } // Return the value at the specified location in memory template Type value_at_location(const Type* data, const ExpressionSize& loc) const { return data[loc[MyArrayNum]]; } // Push an element of an active SpecialMatrix onto the stack template void push_rhs(Stack& stack, Type multiplier, Index gradient_index, const ExpressionSize& loc) const { stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]); } }; // The engine for the SquareMatrix type using column-major // storage; note that this inherits from the row-major version in // order that functions that don't need to be changed can be // imported using "using". template <> struct SquareEngine : public SquareEngine { static const int my_n_arrays = 1; const char* name() const { return "SquareMatrix"; } std::string long_name() const { return "SquareMatrix"; } Index pack_offset(Index dim) const { return dim; } Index index(Index i, Index j, Index offset) const { return i + j*offset; } template Index row_offset(Index offset, const ExpressionSize& loc) const { return offset; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = 0; j_end_plus_1 = dim; index_start = i; index_stride = offset; } template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return data[index(i,j,offset)]; } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return Active(data[index(i,j,offset)]); } template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { return data[index(i,j,offset)]; } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } Index upper_offset(Index dim, Index offset, Index offdiag) const { return offdiag*offset; } Index lower_offset(Index dim, Index offset, Index offdiag) const { return -offdiag; } typedef SquareEngine transpose_engine; using SquareEngine::data_size; using SquareEngine::check_upper_diag; using SquareEngine::check_lower_diag; using SquareEngine::set_extras; using SquareEngine::value_at_location; using SquareEngine::push_rhs; }; // ------------------------------------------------------------------- // Band matrix storage engine // ------------------------------------------------------------------- // A band matrix uses the BLAS packed storage to store LDiags // subdiagonals and UDiags superdiagonals; the default version // uses row-major storage template struct BandEngineHelper { const char* name() const { return "BandMatrix"; } }; template <> struct BandEngineHelper<0,0> { const char* name() const { return "DiagMatrix"; } }; template <> struct BandEngineHelper<1,1> { const char* name() const { return "TridiagMatrix"; } }; template <> struct BandEngineHelper<2,2> { const char* name() const { return "PentadiagMatrix"; } }; template struct BandEngine { static const int my_n_arrays = 3; static const Index diagonals = 1+LDiags+UDiags; const char* name() const { return BandEngineHelper().name(); } std::string long_name() const { std::stringstream s; s << "BandMatrix"; return s.str(); } Index pack_offset(Index dim) const { return diagonals-1; } Index index(Index i, Index j, Index offset) const { // return LDiags + i*offset + j; return i*offset + j; } template Index row_offset(Index offset, const ExpressionSize& loc) const { return 1; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = idim ? dim : i+UDiags+1; index_start = i*offset + j_start; index_stride = 1; } typedef BandEngine transpose_engine; template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { Index off = j-i; Type val; if (off > UDiags || off < (-LDiags)) { val = 0; } else { val = data[index(i,j,offset)]; } return val; } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { Index off = j-i; if (off > UDiags || off < (-LDiags)) { return Active(0.0); } else { return Active(data[index(i,j,offset)]); } } template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index off = j-i; if (off > UDiags || off < (-LDiags)) { throw index_out_of_bounds("Attempt to get lvalue to off-diagonal in BandMatrix" ADEPT_EXCEPTION_LOCATION); } else { return data[index(i,j,offset)]; } } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index off = j-i; if (off > UDiags || off < (-LDiags)) { throw index_out_of_bounds("Attempt to get lvalue to off-diagonal in BandMatrix" ADEPT_EXCEPTION_LOCATION); } else { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } } Index data_size(Index dim, Index offset) const { return (dim-1)*(offset+1) + 1;// + dim; // - UDiags; } Index upper_offset(Index dim, Index offset, Index offdiag) const { return offdiag; } Index lower_offset(Index dim, Index offset, Index offdiag) const { return -offdiag*offset; } void check_upper_diag(Index offdiag) const { if (offdiag > UDiags) { throw index_out_of_bounds("Attempt to get lvalue diagonal to off-diagonal in BandMatrix" ADEPT_EXCEPTION_LOCATION); } } void check_lower_diag(Index offdiag) const { if (-offdiag > LDiags) { throw index_out_of_bounds("Attempt to get lvalue diagonal to off-diagonal in BandMatrix" ADEPT_EXCEPTION_LOCATION); } } template void set_extras(Index i, Index offset, ExpressionSize& index) const { index[MyArrayNum+1] = i*(offset+1) - LDiags; index[MyArrayNum+2] = index[MyArrayNum+1] + diagonals; } template Type value_at_location(const Type* data, const ExpressionSize& loc) const { if (loc[MyArrayNum] >= loc[MyArrayNum+1] && loc[MyArrayNum] < loc[MyArrayNum+2]) { return data[loc[MyArrayNum]]; } else { return 0; } } template void push_rhs(Stack& stack, Type multiplier, Index gradient_index, const ExpressionSize& loc) const { if (loc[MyArrayNum] >= loc[MyArrayNum+1] && loc[MyArrayNum] < loc[MyArrayNum+2]) { stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]); } } }; // The column-major version inherits from the row-major version in // order that some functionality can be imported template struct BandEngine : public BandEngine { static const int my_n_arrays = 3; static const Index diagonals = 1+LDiags+UDiags; const char* name() const { return BandEngineHelper().name(); } std::string long_name() const { std::stringstream s; s << "BandMatrix"; return s.str(); } using BandEngine::pack_offset; Index index(Index i, Index j, Index offset) const { // return UDiags + i + j*offset; return i + j*offset; } template Index row_offset(Index offset, const ExpressionSize& loc) const { return offset; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = idim ? dim : i+UDiags+1; index_start = i + j_start*offset; index_stride = offset; } typedef BandEngine transpose_engine; template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { Index off = j-i; Type val; if (off > UDiags || off < (-LDiags)) { val = 0; } else { val = data[index(i,j,offset)]; } return val; } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { Index off = j-i; if (off > UDiags || off < (-LDiags)) { return Active(0.0); } else { return Active(data[index(i,j,offset)]); } } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index off = j-i; if (off > UDiags || off < (-LDiags)) { throw index_out_of_bounds("Attempt to get lvalue to off-diagonal in BandMatrix" ADEPT_EXCEPTION_LOCATION); } else { return data[index(i,j,offset)]; } } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index off = j-i; if (off > UDiags || off < (-LDiags)) { throw index_out_of_bounds("Attempt to get lvalue to off-diagonal in BandMatrix" ADEPT_EXCEPTION_LOCATION); } else { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } } using BandEngine::data_size; Index upper_offset(Index dim, Index offset, Index offdiag) const { // return LDiags + offdiag*offset; return offdiag*offset; } Index lower_offset(Index dim, Index offset, Index offdiag) const { // return LDiags - offdiag; return -offdiag; } template void set_extras(Index i, Index offset, ExpressionSize& index) const { index[MyArrayNum+1] = (i-LDiags)*(offset+1) + LDiags; index[MyArrayNum+2] = index[MyArrayNum+1] + (diagonals-1)*offset+1; } using BandEngine::check_upper_diag; using BandEngine::check_lower_diag; using BandEngine::value_at_location; using BandEngine::push_rhs; }; // ------------------------------------------------------------------- // Symmetric matrix storage engine // ------------------------------------------------------------------- // A symmetric matrix - the default version (template parameter // ROW_LOWER_COL_UPPER) should be considered to use row-major // storage with the data held on the lower triangle of the // matrix. This is equivalent to column-major upper-triangle // storage for most uses, except that when this kind of symmetric // matrix is used on the left-hand-side of a statement, it will // only read the lower triangle of the right-hand-side of the // statement (assuming the upper triangle to be a symmetric copy). template struct SymmEngine : public SquareEngine { static const int my_n_arrays = 2; const char* name() const { return "SymmMatrix"; } std::string long_name() const { return "SymmMatrix"; } Index index(Index i, Index j, Index offset) const { return i >= j ? i*offset + j : i + j*offset; } template Index row_offset(Index offset, const ExpressionSize& loc) const { return loc[MyArrayNum] < loc[MyArrayNum+1] ? 1 : offset; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = 0; j_end_plus_1 = i+1; index_start = i*offset; index_stride = 1; } typedef SymmEngine transpose_engine; template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return data[index(i,j,offset)]; } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return Active(data[index(i,j,offset)]); } template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { return data[index(i,j,offset)]; } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } template void set_extras(Index i, Index offset, ExpressionSize& index) const { index[MyArrayNum+1] = i*(offset+1); } Index upper_offset(Index dim, Index offset, Index offdiag) const { return offdiag*offset; } Index lower_offset(Index dim, Index offset, Index offdiag) const { return -offdiag*offset; } using SquareEngine::pack_offset; using SquareEngine::data_size; using SquareEngine::check_upper_diag; using SquareEngine::check_lower_diag; using SquareEngine::value_at_location; using SquareEngine::push_rhs; }; // A symmetric matrix whose storage can be considered to be // row-major with the data stored on the upper triangle. This is // equivalent to column-major lower-triangular storage, except // that when this kind of symmetric matrix is on the LHS of a // statement, it will only read the upper triangle of the RHS of // the statement. template <> struct SymmEngine : public SquareEngine { static const int my_n_arrays = 2; const char* name() const { return "SymmMatrix"; } std::string long_name() const { return "SymmMatrix"; } Index pack_offset(Index dim) const { return dim; } Index index(Index i, Index j, Index offset) const { return i <= j ? i*offset + j : i + j*offset; } template Index row_offset(Index offset, const ExpressionSize& loc) const { return loc[MyArrayNum] < loc[MyArrayNum+1] ? offset : 1; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = i; j_end_plus_1 = dim; index_start = i*(1+offset); index_stride = 1; } typedef SymmEngine transpose_engine; Index upper_offset(Index dim, Index offset, Index offdiag) const { return offdiag; } Index lower_offset(Index dim, Index offset, Index offdiag) const { return -offdiag; } template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return data[index(i,j,offset)]; } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { return Active(data[index(i,j,offset)]); } template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { return data[index(i,j,offset)]; } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } template void set_extras(Index i, Index offset, ExpressionSize& index) const { index[MyArrayNum+1] = i*(offset+1); } using SquareEngine::data_size; using SquareEngine::check_upper_diag; using SquareEngine::check_lower_diag; using SquareEngine::value_at_location; using SquareEngine::push_rhs; }; /* // ------------------------------------------------------------------- // Symmetric band matrix storage engine // ------------------------------------------------------------------- */ // ------------------------------------------------------------------- // Triangular matrix storage engines // ------------------------------------------------------------------- // Forward declaration template struct UpperEngine; // Base class for common functions for row-major and column-major // storage template struct LowerBase : public SquareEngine { static const int my_n_arrays = 2; using SquareEngine::pack_offset; using SquareEngine::data_size; using SquareEngine::index; using SquareEngine::row_offset; using SquareEngine::check_lower_diag; using SquareEngine::upper_offset; using SquareEngine::lower_offset; const char* name() const { return "LowerMatrix"; } template void set_extras(Index i, Index offset, ExpressionSize& index) const { index[MyArrayNum+1] = i*(offset+1); } void check_upper_diag(Index offdiag) const { if (offdiag > 0) { throw index_out_of_bounds("Attempt to get lvalue to an upper diagonal of a lower-triangular matrix" ADEPT_EXCEPTION_LOCATION); } } template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { if (i >= j) { return data[index(i,j,offset)]; } else { return 0; } } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { if (i >= j) { return Active(data[index(i,j,offset)]); } else { return Active(0.0); } } template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { if (i >= j) { return data[index(i,j,offset)]; } else { throw index_out_of_bounds("Attempt to get lvalue to upper part of lower-triangular matrix" ADEPT_EXCEPTION_LOCATION); } } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { if (i >= j) { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } else { throw index_out_of_bounds("Attempt to get lvalue to upper part of lower-triangular matrix" ADEPT_EXCEPTION_LOCATION); } } template Type value_at_location(const Type* data, const ExpressionSize& loc) const { if (loc[MyArrayNum] <= loc[MyArrayNum+1]) { return data[loc[MyArrayNum]]; } else { return 0; } } template void push_rhs(Stack& stack, Type multiplier, Index gradient_index, const ExpressionSize& loc) const { if (loc[MyArrayNum] <= loc[MyArrayNum+1]) { stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]); } } }; // Lower-triangular matrix using row-major storage template struct LowerEngine : public LowerBase { std::string long_name() const { return "LowerMatrix"; } typedef UpperEngine transpose_engine; void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = 0; j_end_plus_1 = i+1; index_start = i*offset; index_stride = 1; } }; // Lower-triangular matrix using column-major storage template <> struct LowerEngine : public LowerBase { std::string long_name() const { return "LowerMatrix"; } typedef UpperEngine transpose_engine; void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = 0; j_end_plus_1 = i+1; index_start = i; index_stride = offset; } }; // Base class for common functions for row-major and column-major // storage template struct UpperBase : public SquareEngine { static const int my_n_arrays = 2; using SquareEngine::pack_offset; using SquareEngine::data_size; using SquareEngine::index; using SquareEngine::row_offset; using SquareEngine::check_lower_diag; using SquareEngine::upper_offset; using SquareEngine::lower_offset; const char* name() const { return "UpperMatrix"; } template void set_extras(Index i, Index offset, ExpressionSize& index) const { index[MyArrayNum+1] = i*(offset+1); } void check_lower_diag(Index offdiag) const { if (offdiag < 0) { throw index_out_of_bounds("Attempt to get lvalue to a lower diagonal of an upper-triangular matrix" ADEPT_EXCEPTION_LOCATION); } } template typename internal::enable_if::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { if (i <= j) { return data[index(i,j,offset)]; } else { return 0; } } template typename internal::enable_if >::type get_scalar(Index i, Index j, Index dim, Index offset, Index gradient_index, const Type* data) const { if (i <= j) { return Active(data[index(i,j,offset)]); } else { return Active(0.0); } } template typename internal::enable_if::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { if (i <= j) { return data[index(i,j,offset)]; } else { throw index_out_of_bounds("Attempt to get lvalue to lower part of upper-triangular matrix" ADEPT_EXCEPTION_LOCATION); } } template typename internal::enable_if >::type get_reference(Index i, Index j, Index dim, Index offset, Index gradient_index, Type* data) { if (i <= j) { Index ind = index(i,j,offset); return ActiveReference(data[ind], gradient_index+ind); } else { throw index_out_of_bounds("Attempt to get lvalue to lower part of upper-triangular matrix" ADEPT_EXCEPTION_LOCATION); } } template Type value_at_location(const Type* data, const ExpressionSize& loc) const { if (loc[MyArrayNum] >= loc[MyArrayNum+1]) { return data[loc[MyArrayNum]]; } else { return 0; } } template void push_rhs(Stack& stack, Type multiplier, Index gradient_index, const ExpressionSize& loc) const { if (loc[MyArrayNum] >= loc[MyArrayNum+1]) { stack.push_rhs(multiplier, gradient_index + loc[MyArrayNum]); } } }; // Upper-triangular matrix using row-major storage template struct UpperEngine : public UpperBase { typedef LowerEngine transpose_engine; std::string long_name() const { return "UpperMatrix"; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = i; j_end_plus_1 = dim; index_start = i*(offset+1); index_stride = 1; } }; // Upper-triangular matrix using column-major storage template <> struct UpperEngine : public UpperBase { typedef LowerEngine transpose_engine; std::string long_name() const { return "UpperMatrix"; } void get_row_range(Index i, Index dim, Index offset, Index& j_start, Index& j_end_plus_1, Index& index_start, Index& index_stride) const { j_start = i; j_end_plus_1 = dim; index_start = i*(offset+1); index_stride = offset; } }; } // End namespace internal // ------------------------------------------------------------------- // Definition of SpecialMatrix class // ------------------------------------------------------------------- template , bool IsActive = false> class SpecialMatrix : public Expression >, protected Engine, protected internal::GradientIndex { public: // ------------------------------------------------------------------- // SpecialMatrix: 1. Static Definitions // ------------------------------------------------------------------- // Static definitions to enable the properties of this type of // expression to be discerned at compile time static const bool is_active = IsActive; static const bool is_lvalue = true; static const int rank = 2; static const int n_active = IsActive * (1 + internal::is_complex::value); static const int n_scratch = 0; static const int n_arrays = Engine::my_n_arrays; static const bool is_vectorizable = false; // ------------------------------------------------------------------- // SpecialMatrix: 2. Constructors // ------------------------------------------------------------------- // Initialize an empty array SpecialMatrix() : data_(0), storage_(0), dimension_(0) { ADEPT_STATIC_ASSERT(!(std::numeric_limits::is_integer && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS); } // Initialize an array with specified size SpecialMatrix(const ExpressionSize<2>& dims) : storage_(0) { resize(dims[0], dims[1]); } SpecialMatrix(Index m0) : storage_(0) { resize(m0); } SpecialMatrix(Index m0, Index m1) : storage_(0) { resize(m0,m1); } // A way to directly create arrays, needed when subsetting // other arrays SpecialMatrix(Type* data, Storage* s, Index dim, Index offset) : data_(data), storage_(s), dimension_(dim), offset_(offset) { if (storage_) { storage_->add_link(); internal::GradientIndex::set(data_, storage_); } else { // It is an error if an active object gets here since it will // not have a valid gradient index internal::GradientIndex::assert_inactive(); } } // Similar to the above, but with the gradient index supplied explicitly, // needed when an active FixedArray is being sliced SpecialMatrix(const Type* data0, Index data_offset, Index dim, Index offset, Index gradient_index0) : internal::GradientIndex(gradient_index0, data_offset), data_(const_cast(data0)+data_offset), storage_(0), dimension_(dim), offset_(offset) { } // Initialize an array pointing at existing data: the fact that // storage_ is a null pointer is used to convey the information // that it is not necessary to deallocate the data when this array // is destructed SpecialMatrix(Type* data, Index dim) : data_(data), storage_(0), dimension_(dim), offset_(Engine::pack_offset(dim)) { ADEPT_STATIC_ASSERT(!IsActive, CANNOT_CONSTRUCT_ACTIVE_SQUARE_ARRAY_WITHOUT_GRADIENT_INDEX); } // Copy constructor: links to the source data rather than copying // it. This is needed because we want a function returning an // SpecialMatrix not to make a deep copy, but rather to perform a // (computationally cheaper) shallow copy; when the SpecialMatrix within // the function is destructed, it will remove its link to the // data, and the responsibility for deallocating the data will // then pass to the SpecialMatrix in the calling function. SpecialMatrix(SpecialMatrix& rhs) : internal::GradientIndex(rhs.gradient_index()), data_(rhs.data()), storage_(rhs.storage()), dimension_(rhs.dimension()), offset_(rhs.offset()) { if (storage_) storage_->add_link(); } // Copy constructor with const argument does exactly the same // thing SpecialMatrix(const SpecialMatrix& rhs) : internal::GradientIndex(rhs.gradient_index()), dimension_(rhs.dimension()), offset_(rhs.offset()) { link_(const_cast(rhs)); } private: void link_(SpecialMatrix& rhs) { data_ = const_cast(rhs.data()); storage_ = const_cast*>(rhs.storage()); if (storage_) storage_->add_link(); } public: // Initialize with an expression on the right hand side by // evaluating the expression, requiring the ranks to be equal. // Note that this constructor enables expressions to be used as // arguments to functions that expect an array - to prevent this // implicit conversion, use the "explicit" keyword. template explicit SpecialMatrix(const Expression& rhs, typename internal::enable_if::type = 0) : data_(0), storage_(0), dimension_(0) { *this = rhs; } // Destructor: if the data are stored in a Storage object then we // tell it that one fewer object is linking to it; if the number // of links to it drops to zero, it will destruct itself and // deallocate the memory. ~SpecialMatrix() { if (storage_) storage_->remove_link(); } // ------------------------------------------------------------------- // SpecialMatrix: 3. Assignment operators // ------------------------------------------------------------------- // Assignment to another matrix: copy the data... // Ideally we would like this to fall back to the operator=(const // Expression&) function, but if we don't define a copy assignment // operator then C++ will generate a default one :-( SpecialMatrix& operator=(const SpecialMatrix& rhs) { *this = static_cast&> (rhs); return *this; } // Assignment to an array expression of the same rank template typename internal::enable_if::type operator=(const Expression& rhs) { #ifndef ADEPT_NO_DIMENSION_CHECKING ExpressionSize<2> dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (empty()) { resize(dims[0], dims[1]); } else if (!internal::compatible(dims, dimensions())) { std::string str = "Expr"; str += dims.str() + " object assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } #else if (empty()) { ExpressionSize<2> dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } resize(dims[0], dims[1]); } #endif if (!empty()) { #ifndef ADEPT_NO_ALIAS_CHECKING // Check for aliasing first Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (rhs.is_aliased(ptr_begin, ptr_end)) { SpecialMatrix copy; // It would be nice to wrap noalias around rhs, but then // this leads to infinite template recursion since the "=" // operator calls the current function but with a modified // expression type. perhaps a better way would be to make // copy.assign_no_alias(rhs) work. copy = rhs; assign_expression_(copy); } else { #endif // Select active/passive version by delegating to a // protected function assign_expression_(rhs); #ifndef ADEPT_NO_ALIAS_CHECKING } #endif } return *this; } // Assignment to an array expression of the same rank in which the // activeness of the right-hand-side is ignored template typename internal::enable_if::type assign_inactive(const Expression& rhs) { ExpressionSize<2> dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (empty()) { resize(dims[0], dims[1]); } else if (!internal::compatible(dims, dimensions())) { std::string str = "Expr"; str += dims.str() + " object assigned to " + expression_string_(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } if (!empty()) { // Check for aliasing first Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (rhs.is_aliased(ptr_begin, ptr_end)) { std::cout << "ALIASED!\n"; SpecialMatrix copy; copy.assign_inactive(rhs); // *this = copy; assign_expression_(copy); } else { assign_expression_(rhs); } } return *this; } // Assignment to a single value copies to every element template typename internal::enable_if::value, SpecialMatrix&>::type operator=(RType rhs) { if (!empty()) { assign_inactive_scalar(rhs); } return *this; } // Assign active scalar expression to an active array by first // converting the RHS to an active scalar template typename internal::enable_if::type operator=(const Expression& rhs) { Active x = rhs; *this = x; return *this; } // An active array being assigned to an active scalar template typename internal::enable_if::value && IsActive, SpecialMatrix&>::type operator=(const Active& rhs) { // If not recording we call the inactive version instead #ifdef ADEPT_RECORDING_PAUSABLE if (! ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar(rhs.scalar_value()); return *this; } #endif Type val = rhs.scalar_value(); Index j_start, j_end_plus_1, index, index_stride; for (Index i = 0 ; i < dimension_; ++i) { Engine::get_row_range(i, dimension_, offset_, j_start, j_end_plus_1, index, index_stride); for (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) { data_[index] = val; ADEPT_ACTIVE_STACK->push_rhs(1.0, rhs.gradient_index()); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); } } return *this; } // All the compound assignment operators are unpacked, i.e. a+=b // becomes a=a+b; first for an Expression on the rhs. We use // "noalias" sine there is no need for the entirety of the // right-hand-side of the expression to be copied before // evaluation. template SpecialMatrix& operator+=(const Expression& rhs) { return *this = (noalias(*this) + rhs); } template SpecialMatrix& operator-=(const Expression& rhs) { return *this = (noalias(*this) - rhs); } template SpecialMatrix& operator*=(const Expression& rhs) { return *this = (noalias(*this) * rhs); } template SpecialMatrix& operator/=(const Expression& rhs) { return *this = (noalias(*this) / rhs); } // And likewise for a passive scalar on the rhs template typename internal::enable_if::value, SpecialMatrix&>::type operator+=(const PType& rhs) { return *this = (noalias(*this) + rhs); } template typename internal::enable_if::value, SpecialMatrix&>::type operator-=(const PType& rhs) { return *this = (noalias(*this) - rhs); } template typename internal::enable_if::value, SpecialMatrix&>::type operator*=(const PType& rhs) { return *this = (noalias(*this) * rhs); } template typename internal::enable_if::value, SpecialMatrix&>::type operator/=(const PType& rhs) { return *this = (noalias(*this) / rhs); } // ------------------------------------------------------------------- // SpecialMatrix: 4. Access functions, particularly operator() // ------------------------------------------------------------------- // Get l-value of the element at the specified coordinates typename internal::active_reference::type get_lvalue(const ExpressionSize<2>& i) { return get_lvalue_(Engine::index(i[0],i[1],offset_)); } protected: template typename internal::enable_if >::type get_lvalue_(const Index& loc) { return ActiveReference(data_[loc], gradient_index()+loc); } template typename internal::enable_if::type get_lvalue_(const Index& loc) { return data_[loc]; } public: // Access individual elements of the array. Each argument must be // of integer type, or a rank-0 expression of integer type (such // as "end" or "end-3"). Inactive arrays return a reference to the // element, while active arrays return an ActiveReference // object. template typename internal::enable_if::value, typename internal::active_reference::type>::type operator()(I0 i0, I1 i1) { return Engine::template get_reference(internal::get_index_with_len(i0,dimension_), internal::get_index_with_len(i1,dimension_), dimension_, offset_, gradient_index(), data_); } template typename internal::enable_if::value, typename internal::active_scalar::type>::type operator()(I0 i0, I1 i1) const { return Engine::template get_scalar(internal::get_index_with_len(i0,dimension_), internal::get_index_with_len(i1,dimension_), dimension_, offset_, gradient_index(), data_); } /* // If one or more of the indices is not guaranteed to be monotonic // at compile time then we must return an IndexedSpecialMatrix, now done // for all possible numbers of arguments template typename internal::enable_if::value && !internal::is_ranged::value, IndexedSpecialMatrix::count, Type,IsActive,SpecialMatrix,I0,I1> >::type operator()(const I0& i0, const I1& i1) { static const int new_rank = internal::is_indexed::count; return IndexedSpecialMatrix(*this, i0, i1); } */ // diag_vector(offdiag), where A is a 2D square band matrix (including // DiagMatrix, TridiagMatrix etc), returns a 1D array pointing to // the "offdiag"-th diagonal of the original data, Can be used as an // lvalue. Array<1,Type,IsActive> diag_vector(Index offdiag = 0) { if (offdiag >= 0) { Engine::check_upper_diag(offdiag); ExpressionSize<1> dim(dimension_ - offdiag); ExpressionSize<1> offset(offset_+1); return Array<1,Type,IsActive>(data_ +Engine::upper_offset(dimension_,offset_,offdiag), storage_, dim, offset); } else { Engine::check_lower_diag(offdiag); ExpressionSize<1> dim(dimension_ + offdiag); ExpressionSize<1> offset(offset_+1); return Array<1,Type,IsActive>(data_ +Engine::lower_offset(dimension_,offset_,offdiag), storage_, dim, offset); } } // Extract a square sub-matrix on the diagonal SpecialMatrix submatrix_on_diagonal(Index istart, Index iend) { if (istart < 0 || istart > iend || iend >= dimension_) { throw index_out_of_bounds("Dimensions out of range in submatrix_on_diagonal" ADEPT_EXCEPTION_LOCATION); } return SpecialMatrix(data_+(offset_+1)*istart, storage_, iend-istart+1, offset_); } // FIX - add an rvalue version returning const Array (?) // Transpose as an lvalue SpecialMatrix T() { return SpecialMatrix(data_, storage_, dimension_, offset_); } // Return a SpecialMatrix that is a "soft" link to the data in the // present array; that is, it does not copy the Storage object and // increase the reference counter therein. This is useful in a // multi-threaded environment when multiple threads may wish to // subset the same array. SpecialMatrix soft_link() { return SpecialMatrix(data_,0,dimension_,offset_,gradient_index()); } const SpecialMatrix soft_link() const { return SpecialMatrix(data_,0,dimension_,offset_,gradient_index()); } // ------------------------------------------------------------------- // SpecialMatrix: 5. Public member functions // ------------------------------------------------------------------- // Link to an existing array of the same rank, type and activeness SpecialMatrix& link(SpecialMatrix& rhs) { if (!rhs.data()) { throw empty_array("Attempt to link to empty array" ADEPT_EXCEPTION_LOCATION); } else { clear(); data_ = rhs.data(); storage_ = rhs.storage(); dimension_ = rhs.dimension(); offset_ = rhs.offset(); if (storage_) { storage_->add_link(); } } return *this; } #ifndef ADEPT_MOVE_SEMANTICS // A common pattern is to link to a subset of another // SpecialMatrix, e.g. vec1.link(vec2(range(2,4))), but the // problem is that the argument to link is a temporary so will not // bind to SpecialMatrix&. In C++98 we therefore need a function // taking const SpecialMatrix& and then cast away the const-ness. This has // the unfortunate side effect that a non-const SpecialMatrix can be // linked to a const SpecialMatrix. SpecialMatrix& link(const SpecialMatrix& rhs) { return link(const_cast(rhs)); } #else // But in C++11 we can solve this problem and only bind to // temporary non-const SpecialMatrix SpecialMatrix& link(SpecialMatrix&& rhs) { return link(const_cast(rhs)); } #endif // Fortran-like link syntax A >>= B SpecialMatrix& operator>>=(SpecialMatrix& rhs) { return link(rhs); } #ifndef ADEPT_MOVE_SEMANTICS SpecialMatrix& operator>>=(const SpecialMatrix& rhs) { return link(const_cast(rhs)); } #else SpecialMatrix& operator>>=(SpecialMatrix&& rhs) { return link(const_cast(rhs)); } #endif // STL-like size() returns total length of array Index size() const { return dimension_*dimension_; } // Return dimensions ExpressionSize<2> dimensions() const { return ExpressionSize<2>(dimension_,dimension_); } bool get_dimensions_(ExpressionSize<2>& dim) const { dim[0] = dim[1] = dimension_; return true; } // Return individual dimension Index dimension(int j = 0) const { return dimension_; } // Return individual offset Index offset() const { return offset_; } /* // Get dimensions for matrix operations, treating 1D arrays as // column vectors void get_matrix_dimensions(ExpressionSize<2>& dim) const { dim[0] = dim[1] = dimension_; } */ /* // Return constant reference to offsets const ExpressionSize& offset() const { return offset_; } const Index& last_offset() const { return offset_[Rank-1]; } */ // Return true if the array is empty bool empty() const { return (dimension_ == 0); } // Return a string describing the array std::string info_string() const { std::stringstream str; str << Engine::long_name() << ", dim=" << dimension_ << ", offset=" << offset_ << ", data_location=" << data_; return str.str(); } // Return a pointer to the start of the data Type* data() { return data_; } const Type* data() const { return data_; } const Type* const_data() const { return data_; } // Older style Type* data_pointer() { return data_; } const Type* data_pointer() const { return data_; } const Type* const_data_pointer() const { return data_; } // Return a pointer to the storage object Storage* storage() { return storage_; } // Reset the array to its original empty state, removing the link // to the data (which may deallocate the data if it was the only // link) and set the dimensions to zero void clear() { if (storage_) { storage_->remove_link(); storage_ = 0; } data_ = 0; dimension_ = 0; offset_ = 0; internal::GradientIndex::clear(); } // Resize an array void resize(Index dim) { ADEPT_STATIC_ASSERT(!(std::numeric_limits::is_integer && IsActive), CANNOT_CREATE_ACTIVE_ARRAY_OF_INTEGERS); if (storage_) { storage_->remove_link(); storage_ = 0; } // Check requested dimensions if (dim < 0) { throw invalid_dimension("Negative array dimension requested" ADEPT_EXCEPTION_LOCATION); } else if (dim == 0) { clear(); } else { dimension_ = dim; offset_ = Engine::pack_offset(dim); storage_ = new Storage(Engine::data_size(dimension_,offset_), IsActive); data_ = storage_->data(); internal::GradientIndex::set(data_, storage_); } } // Resize with an ExpressionSize object void resize(Index dim0, Index dim1) { if (dim0 != dim1) { throw invalid_dimension("Square matrix must have the same x and y dimensions" ADEPT_EXCEPTION_LOCATION); } resize(dim0); } bool is_aliased_(const Type* mem1, const Type* mem2) const { Type const * ptr_begin; Type const * ptr_end; data_range(ptr_begin, ptr_end); if (ptr_begin <= mem2 && ptr_end >= mem1) { return true; } else { return false; } } // Cannot traverse a full row just by incrementing an index by 1 bool all_arrays_contiguous_() const { return false; } Type value_with_len_(const Index& j, const Index& len) const { ADEPT_STATIC_ASSERT(false, CANNOT_USE_VALUE_WITH_LEN_ON_ARRAY_OF_RANK_OTHER_THAN_1); return 0; } std::string expression_string_() const { std::stringstream a; a << Engine::name() << "[" << dimension_ << "," << dimension_ << "]"; return a.str(); } // The same as operator=(inactive scalar) but does not put // anything on the stack template typename internal::enable_if::value, SpecialMatrix&>::type set_value(RType x) { if (!empty()) { assign_inactive_scalar(x); } return *this; } // Is the array contiguous in memory? bool is_contiguous() const { return (offset_ == Engine::pack_offset(dimension_)); } // Return the gradient index for the first element in the array, // or -1 if not active Index gradient_index() const { return internal::GradientIndex::get(); } /* std::ostream& print(std::ostream& os) const { if (empty()) { os << "(empty " << Engine::name() << ")"; } else if (adept::internal::array_print_curly_brackets) { os << "\n"; for (int i = 0; i < dimension_; ++i) { if (i == 0) { os << "{{"; } else { os << " {"; } for (int j = 0; j < dimension_; ++j) { os << (*this)(i,j); if (j < dimension_-1) { os << ", "; } } os << "}"; if (i < dimension_-1) { os << ",\n"; } else { // os << "}\n"; os << "}"; } } } else { for (int i = 0; i < dimension_; ++i) { for (int j = 0; j < dimension_; ++j) { os << (*this)(i,j); if (j < dimension_-1) { os << " "; } } os << "\n"; } } return os; } */ std::ostream& print(std::ostream& os) const { const Array x(*this); x.print(os); return os; } std::ostream& print_raw(std::ostream& os) const { if (empty()) { os << "(empty " << Engine::name() << ")\n"; } else { for (Index i = 0; i < Engine::data_size(dimension_,offset_); ++i) { os << " " << data_[i]; } os << "\n"; } return os; } // Get pointers to the first and last data members in memory. void data_range(Type const * &data_begin, Type const * &data_end) const { data_begin = data_; data_end = data_ + Engine::data_size(dimension_, offset_) - 1; } // The Stack::independent(x) and Stack::dependent(y) functions add // the gradient_index of objects x and y to std::vector // objects in Stack. Since x and y may be scalars or arrays, this // is best done by delegating to the Active or Array classes. template void push_gradient_indices(std::vector& vec) { ADEPT_STATIC_ASSERT(IsActive, CANNOT_PUSH_GRADIENT_INDICES_FOR_INACTIVE_SPECIAL_MATRIX); Index j_start, j_end_plus_1, index, index_stride; Index gradient_ind = gradient_index(); vec.reserve(vec.size() + Engine::data_size(dimension_, offset_)); for (Index i; i < dimension_; ++i) { Engine::get_row_range(i, dimension_, offset_, j_start, j_end_plus_1, index, index_stride); for (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) { vec.push_back(gradient_ind + index); } } } // Return inactive array linked to original data SpecialMatrix inactive_link() { SpecialMatrix A; A.data_ = data_; A.storage_ = storage_; A.dimension_ = dimension_; A.offset_ = offset_; if (storage_) storage_->add_link(); return A; } // ------------------------------------------------------------------- // SpecialMatrix: 6. Member functions accessed by the Expression class // ------------------------------------------------------------------- template void set_location_(const ExpressionSize<2>& i, ExpressionSize& index) const { index[MyArrayNum] = Engine::index(i[0],i[1],offset_); Engine::template set_extras(i[0],offset_,index); } template Type value_at_location_(const ExpressionSize& loc) const { return Engine::template value_at_location(data_, loc); } Type& lvalue_at_location(const Index& loc) { return data_[loc]; } template Type value_at_location_store_(const ExpressionSize& loc, internal::ScratchVector& scratch) const { return Engine::template value_at_location(data_, loc); } template Type value_stored_(const ExpressionSize& loc, const internal::ScratchVector& scratch) const { return Engine::template value_at_location(data_, loc); } template void advance_location_(ExpressionSize& loc) const { loc[MyArrayNum] += Engine::template row_offset(offset_, loc); } // If an expression leads to calc_gradient being called on an // active object, we push the multiplier and the gradient index on // to the operation stack (or 1.0 if no multiplier is specified template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch) const { Engine::template push_rhs(stack, static_cast(1.0), gradient_index(), loc); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const internal::ScratchVector& scratch, const MyType& multiplier) const { Engine::template push_rhs(stack, multiplier, gradient_index(), loc); } // ------------------------------------------------------------------- // SpecialMatrix: 7. Protected member functions // ------------------------------------------------------------------- protected: // When assigning a scalar to a whole array, there may be // advantage in specialist behaviour depending on the rank of the // array. This is a generic one that copies the number but treats // the present array as passive. template typename internal::enable_if::type assign_inactive_scalar(X x) { Index j_start, j_end_plus_1, index, index_stride; for (Index i = 0 ; i < dimension_; ++i) { Engine::get_row_range(i, dimension_, offset_, j_start, j_end_plus_1, index, index_stride); for (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) { data_[index] = x; } } } // An active array being assigned the value of an inactive scalar template typename internal::enable_if::type assign_inactive_scalar(X x) { // If not recording we call the inactive version instead #ifdef ADEPT_RECORDING_PAUSABLE if (! ADEPT_ACTIVE_STACK->is_recording()) { assign_inactive_scalar(x); return; } #endif Index j_start, j_end_plus_1, index, index_stride; for (Index i = 0 ; i < dimension_; ++i) { Engine::get_row_range(i, dimension_, offset_, j_start, j_end_plus_1, index, index_stride); ADEPT_ACTIVE_STACK->push_lhs_range(gradient_index()+index, j_end_plus_1-j_start, index_stride); for (Index j = j_start; j < j_end_plus_1; ++j, index += index_stride) { data_[index] = x; } } } // When copying an expression to a whole array, there may be // advantage in specialist behaviour depending on the rank of the // array template typename internal::enable_if::type assign_expression_(const E& rhs) { ADEPT_STATIC_ASSERT(!EIsActive, CANNOT_ASSIGN_ACTIVE_EXPRESSION_TO_INACTIVE_ARRAY); ExpressionSize<2> i(0); ExpressionSize::n_arrays> ind(0); Index j_start, j_end_plus_1, index, index_stride; for ( ; i[0] < dimension_; ++i[0]) { Engine::get_row_range(i[0], dimension_, offset_, j_start, j_end_plus_1, index, index_stride); i[1] = j_start; rhs.set_location(i, ind); for (i[1] = j_start; i[1] < j_end_plus_1; ++i[1], index += index_stride) { data_[index] = rhs.next_value(ind); } } } template typename internal::enable_if::type assign_expression_(const E& rhs) { // If recording has been paused then call the inactive version #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { assign_expression_(rhs); return; } #endif ExpressionSize<2> i(0); ExpressionSize::n_arrays> ind(0); ADEPT_ACTIVE_STACK->check_space(internal::expr_cast::n_active * size()); Index j_start, j_end_plus_1, index, index_stride; for ( ; i[0] < dimension_; ++i[0]) { Engine::get_row_range(i[0], dimension_, offset_, j_start, j_end_plus_1, index, index_stride); i[1] = j_start; rhs.set_location(i, ind); for (i[1] = j_start; i[1] < j_end_plus_1; ++i[1], index += index_stride) { data_[index] = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, ind); ADEPT_ACTIVE_STACK->push_lhs(gradient_index()+index); } } } // ------------------------------------------------------------------- // SpecialMatrix: 8. Data // ------------------------------------------------------------------- protected: Type* data_; // Pointer to values Storage* storage_; // Pointer to Storage object Index dimension_; // Size of each dimension Index offset_; // Memory offset for // slowest-varying dimension }; // End of SpecialMatrix class // ------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------- // Print array on a stream template inline std::ostream& operator<<(std::ostream& os, const SpecialMatrix& A) { return A.print(os); } // Extract inactive part of array, working correctly depending on // whether argument is active or inactive template inline SpecialMatrix& value(SpecialMatrix& expr) { return expr; } template inline SpecialMatrix value(SpecialMatrix& expr) { return expr.inactive_link(); } // Array::diag_matrix(), where Array is a 1D array, returns a // DiagMatrix containing the data as the diagonal pointing to the // original data, Can be used as an lvalue. Needs to be defined // after DiagMatrix. template inline SpecialMatrix, IsActive> Array::diag_matrix() { return SpecialMatrix, IsActive> (data_, storage_, dimensions_[0], offset_[0]-1); } template inline SpecialMatrix, IsActive> FixedArray::diag_matrix() { return SpecialMatrix, IsActive> (data_, 0, dimension_<0>::value, offset_<0>::value-1, internal::GradientIndex::get()); } } // End namespace adept #endif ================================================ FILE: include/adept/Stack.h ================================================ /* Stack.h -- Storage of automatic differentiation information Copyright (C) 2012-2014 University of Reading Copyright (C) 2015-2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The Stack class is where all the derivative information of an algorithm, from which the Jacobian matrix can be constructed, as well as tangent-linear and adjoint operations being carried out for suitable input derivatives. When a Stack object is created it puts a pointer to itself in a global but thread-local variable that is then accessed whenever an active expression is evaluated. */ #ifndef AdeptStack_H #define AdeptStack_H 1 #include #include #include #include #include #include #include #include #include #ifdef ADEPT_STACK_STORAGE_STL #include #endif #include #include #include #include #include namespace adept { // --------------------------------------------------------------------- // Access to Stack object via global pointer // --------------------------------------------------------------------- // Declare a thread-safe and a thread-unsafe global pointer to the // current stack class Stack; extern ADEPT_THREAD_LOCAL Stack* _stack_current_thread; extern Stack* _stack_current_thread_unsafe; // Define ADEPT_ACTIVE_STACK to be the currently active version // regardless of whether we are in thread safe or unsafe mode #ifdef ADEPT_STACK_THREAD_UNSAFE #define ADEPT_ACTIVE_STACK adept::_stack_current_thread_unsafe #else #define ADEPT_ACTIVE_STACK adept::_stack_current_thread #endif // --------------------------------------------------------------------- // Helper classes // --------------------------------------------------------------------- // Structure holding a fixed-size array of objects (intended for // double or float) template struct Block { Block() { zero(); } const Type& operator[](uIndex i) const { return data[i]; } Type& operator[](uIndex i) { return data[i]; } void zero() { for (uIndex i = 0; i < Size; i++) data[i] = 0.0; } Type data[Size] ADEPT_SSE2_ALIGNED; }; // Structure for describing a gap in the current list of gradients struct Gap { Gap(uIndex value) : start(value), end(value) {} Gap(uIndex start_, uIndex end_) : start(start_), end(end_) {} uIndex start; uIndex end; }; // Forward declaration of Array, to enable Jacobian functions template class Array; // --------------------------------------------------------------------- // Definition of Stack class // --------------------------------------------------------------------- // "Stack" inherits from a class defining the storage of the stack // information, which is controlled by preprocessor // variables. Member functions not defined here are in Stack.cpp. class Stack #ifdef ADEPT_STACK_STORAGE_STL : public internal::StackStorageOrigStl #else : public internal::StackStorageOrig #endif { public: // ------------------------------------------------------------------- // Stack: 1. Static Definitions // ------------------------------------------------------------------- typedef std::list GapList; typedef std::list::iterator GapListIterator; // ------------------------------------------------------------------- // Stack: 2. Constructor and destructor // ------------------------------------------------------------------- // Only one constructor, which is normally called with no // arguments, but if "false" is provided as the argument it will // construct as normal but not attempt to make itself the active stack Stack(bool activate_immediately = true) : #ifndef ADEPT_STACK_STORAGE_STL gradient_(0), #endif most_recent_gap_(gap_list_.end()), i_gradient_(0), n_allocated_gradients_(0), max_gradient_(0), n_gradients_registered_(0), gradients_initialized_(false), #ifdef ADEPT_STACK_THREAD_UNSAFE is_thread_unsafe_(true), #else is_thread_unsafe_(false), #endif is_recording_(true), // Since the library might be compiled with OpenMP support and // subsequent programs without, we need to tell the library via // the following variable #ifdef _OPENMP have_openmp_(true), #else have_openmp_(false), #endif openmp_manually_disabled_(false) { initialize(ADEPT_INITIAL_STACK_LENGTH); new_recording(); if (activate_immediately) { activate(); } } // Destructor ~Stack(); // ------------------------------------------------------------------- // Stack: 3. Public member functions // ------------------------------------------------------------------- // This function is no longer available void start(uIndex n = ADEPT_INITIAL_STACK_LENGTH) { throw feature_not_available("The Stack::start() function has been removed since Adept version 1.0: see the documentation about how to use Stack::new_recording()" ADEPT_EXCEPTION_LOCATION); } // After a sequence of operation pushes, we may append these to // the previous statement by calling this function. // gradient_index is the index of the gradient on the LHS of the // statement: if this does not match the LHS of the previous // statement then this is an error and "false" will be returned. A // "true" return value indicates success. bool update_lhs(const uIndex& gradient_index) { if (statement_[n_statements_-1].index != gradient_index) { return false; } else { statement_[n_statements_-1].end_plus_one = n_operations_; return true; } } // When an aReal object is created it is registered on the stack // and keeps a copy of its location, which is returned from this // function uIndex register_gradient() { uIndex return_val; #ifdef ADEPT_RECORDING_PAUSABLE if (is_recording()) { #endif n_gradients_registered_++; if (gap_list_.empty()) { // Add to end of gradient vector i_gradient_++; if (i_gradient_ > max_gradient_) { max_gradient_ = i_gradient_; } return_val = i_gradient_-1; } else { // Insert in a gap Gap& first_gap = gap_list_.front(); return_val = first_gap.start; first_gap.start++; if (first_gap.start > first_gap.end) { // Gap has closed: remove it from the list, after checking // if it had been stored as the gap that had most recently // grown if (most_recent_gap_ == gap_list_.begin()) { most_recent_gap_ = gap_list_.end(); } gap_list_.pop_front(); } } #ifdef ADEPT_RECORDING_PAUSABLE } else { return_val = 0; } #endif return return_val; } // Register n gradients and return the index of the first one uIndex register_gradients(const uIndex& n) { uIndex return_val; #ifdef ADEPT_RECORDING_PAUSABLE if (is_recording()) { #endif return_val = do_register_gradients(n); #ifdef ADEPT_RECORDING_PAUSABLE } else { return_val = 0; } #endif return return_val; } // When an aReal object is destroyed it is unregistered from the // stack. If it is at the top of the stack then the stack pointer // can be decremented so that the space can be used by another // object. A gap can appear in the stack if an active object (or // array of active objects) is returned from a function, so we // need to keep track of a "gap" appearing in the stack. If the // user uses new and delete without any regard for this "last-in // first-out" preference then the number of gradients that are // allocated in the reverse pass may be larger than needed. void unregister_gradient(const uIndex& gradient_index) { n_gradients_registered_--; if (gradient_index+1 == i_gradient_) { // Gradient to be unregistered is at the top of the stack i_gradient_--; if (!gap_list_.empty()) { Gap& last_gap = gap_list_.back(); if (i_gradient_ == last_gap.end+1) { // We have unregistered the elements between the "gap" of // unregistered element and the top of the stack, so can // set the variables indicating the presence of the gap to // zero i_gradient_ = last_gap.start; GapListIterator it = gap_list_.end(); it--; if (most_recent_gap_ == it) { most_recent_gap_ = gap_list_.end(); } gap_list_.pop_back(); } } } else { // Gradient to be unregistered not at top of stack. // In the less common situation that the gradient is not at // the top of the stack, the task of unregistering is a bit // more involved, so we carry it out in a non-inline function // to avoid code bloat unregister_gradient_not_top(gradient_index); } } // Unregister n gradients starting at gradient_index void unregister_gradients(const uIndex& gradient_index, const uIndex& n); protected: uIndex do_register_gradients(const uIndex& n); // Unregister a gradient that is not at the top of the stack void unregister_gradient_not_top(const uIndex& gradient_index); public: // Set the gradients in the list with indices between start and // end_plus_one-1 to the values pointed to by "gradient" template typename internal::enable_if::value, void>::type set_gradients(uIndex start, uIndex end_plus_one, const MyReal* gradient) { // Need to initialize the gradient list if not already done if (!gradients_are_initialized()) { initialize_gradients(); } if (end_plus_one > max_gradient_) { throw gradient_out_of_range(); } for (uIndex i = start, j = 0; i < end_plus_one; i++, j++) { gradient_[i] = gradient[j]; } } template typename internal::enable_if::value, void>::type set_gradients(uIndex start, uIndex end_plus_one, const MyReal* gradient, Index src_stride, Index target_stride) { // Need to initialize the gradient list if not already done if (!gradients_are_initialized()) { initialize_gradients(); } if (end_plus_one > max_gradient_) { throw gradient_out_of_range(); } for (uIndex i = start, j = 0; i < end_plus_one; i+=target_stride, j+=src_stride) { gradient_[i] = gradient[j]; } } // Get the gradients in the list with indices between start and // end_plus_one-1 and put them in the location pointed to by // "gradient" template typename internal::enable_if::value, void>::type get_gradients(uIndex start, uIndex end_plus_one, MyReal* gradient) const { if (!gradients_are_initialized()) { throw gradients_not_initialized(); } if (end_plus_one > max_gradient_) { throw gradient_out_of_range(); } for (uIndex i = start, j = 0; i < end_plus_one; i++, j++) { gradient[j] = gradient_[i]; } } template typename internal::enable_if::value, void>::type get_gradients(uIndex start, uIndex end_plus_one, MyReal* gradient, Index src_stride, Index target_stride) const { if (!gradients_are_initialized()) { throw gradients_not_initialized(); } if (end_plus_one > max_gradient_) { throw gradient_out_of_range(); } for (uIndex i = start, j = 0; i < end_plus_one; i+=src_stride, j+=target_stride) { gradient[j] = gradient_[i]; } } // Run the tangent-linear algorithm on the gradient list; normally // this call is preceded calls to set_gradient to load input // gradients and followed by calls to get_gradient to extract // gradients void compute_tangent_linear(); void forward() { return compute_tangent_linear(); } // Run the adjoint algorithm on the gradient list; normally this // call is preceded calls to set_gradient to load input gradient // and followed by calls to get_gradient to extract gradient void compute_adjoint(); void reverse() { return compute_adjoint(); } // Return the number of independent and dependent variables that // have been identified uIndex n_independent() const { return static_cast(independent_index_.size()); } uIndex n_dependent() const { return static_cast(dependent_index_.size()); } // Compute the Jacobian matrix; note that jacobian_out must be // allocated to be of size m*n, where m is the number of dependent // variables and n is the number of independents. The independents // and dependents must have already been identified with the // functions "independent" and "dependent", otherwise this // function will throw a // "dependents_or_independents_not_identified" exception. The // optional dep_offset and indep_offset specify the offsets in // memory of the dependent and independent variables, // respectively, where 0 indicates to use the size of the other // dimension. The default is dep_offset=1, i.e. the dependents // vary contiguously in memory which is equivalent to the Jacobian // being stored in column-major order. Unfortunately this is not // the same as the convention for Adept arrays, but this part of // the interface was designed in Adept 1 before arrays were added. void jacobian(Real* jacobian_out, Index dep_offset = 1, Index indep_offset = 0) const { // Call one of jacobian_forward and jacobian_reverse, whichever // would be faster. if (n_independent() <= n_dependent()) { jacobian_forward(jacobian_out, dep_offset, indep_offset); } else { jacobian_reverse(jacobian_out, dep_offset, indep_offset); } }; // Compute the Jacobian matrix, but explicitly specify whether // this is done with repeated forward or reverse passes. void jacobian_forward(Real* jacobian_out, Index dep_offset = 1, Index indep_offset = 0) const; void jacobian_reverse(Real* jacobian_out, Index dep_offset = 1, Index indep_offset = 0) const; // If the user included "adept_arrays.h" rather than "adept.h", // then allow the Jacobian to be returned in the form of an Adept // matrix. void jacobian(Array<2,Real,false> jac) const; void jacobian_forward(Array<2,Real,false> jac) const; void jacobian_reverse(Array<2,Real,false> jac) const; Array<2,Real,false> jacobian() const; Array<2,Real,false> jacobian_forward() const; Array<2,Real,false> jacobian_reverse() const; // Return maximum number of OpenMP threads to be used in Jacobian // calculation int max_jacobian_threads() const; // Set the maximum number of threads to be used in Jacobian // calculations, if possible. A value of 1 indicates that OpenMP // will not be used, while a value of 0 indicates that the number // will match the number of available processors. Returns the // maximum that will be used, which will be 1 if the Adept library // was compiled without OpenMP support. Note that a value of 1 // will disable the use of OpenMP with Adept, so Adept will then // use no OpenMP directives or function calls. Note that if in // your program you use OpenMP with each thread performing // automatic differentiaion with its own independent Adept stack, // then typically only one OpenMP thread is available for each // Jacobian calculation, regardless of whether you call this // function. int set_max_jacobian_threads(int n); // In order to compute the jacobian we need to first declare which // active variables are independent (x) and which are dependent // (y). First, the following two functions declare an individual // active variable and an array of active variables to be // independent. Note that we use templates here because aReal has // not been defined. template void independent(const A& x) { // independent_index_.push_back(x.gradient_index()); x.push_gradient_indices(independent_index_); } template void independent(const A* x, uIndex n) { for (uIndex i = 0; i < n; i++) { // independent_index_.push_back(x[i].gradient_index()); x[i].push_gradient_indices(independent_index_); } } // Likewise, delcare the dependent variables template void dependent(const A& x) { // dependent_index_.push_back(x.gradient_index()); x.push_gradient_indices(dependent_index_); } template void dependent(const A* x, uIndex n) { for (uIndex i = 0; i < n; i++) { // dependent_index_.push_back(x[i].gradient_index()); x[i].push_gradient_indices(dependent_index_); } } // Print various bits of information about the Stack to the // specified stream (or standard output if not specified). The // same behaviour can be obtained by "<<"-ing the Stack to a // stream. void print_status(std::ostream& os = std::cout) const; // Print each derivative statement to the specified stream (or // standard output if not specified) void print_statements(std::ostream& os = std::cout) const; // Print the current gradient list to the specified stream (or // standard output if not specified); returns true on success or // false if no gradients have been initialized bool print_gradients(std::ostream& os = std::cout) const; // Print a list of the gaps in the gradient list void print_gaps(std::ostream& os = std::cout) const; // Clear the gradient list enabling a new adjoint or // tangent-linear computation to be performed with the same // recording void clear_gradients() { gradients_initialized_ = false; } // Clear the list of independent variables, in order that a // different Jacobian can be computed from the same recording void clear_independents() { independent_index_.clear(); } // Clear the list of dependent variables, in order that a // different Jacobian can be computed from the same recording void clear_dependents() { dependent_index_.clear(); } // Function now removed void clear() { throw feature_not_available("The Stack::clear() function has been removed since Adept version 1.0: see the documentation about how to use Stack::new_recording()" ADEPT_EXCEPTION_LOCATION); } // Function now removed void clear_statements() { throw feature_not_available("The Stack::clear_statements() function has been removed since Adept version 1.0: see the documentation about how to use Stack::new_recording()" ADEPT_EXCEPTION_LOCATION); } // Make this stack "active" by copying its "this" pointer to a // global variable; this makes it the stack that aReal objects // subsequently interact with when being created and participating // in mathematical expressions void activate(); // This stack will stop being the one that aReal objects refer // to; this may be useful if the thread needs to use another stack // object for the next algorithm void deactivate() { if (is_active()) { ADEPT_ACTIVE_STACK = 0; } } // Return true if the Stack is "active", false otherwise bool is_active() const { return (ADEPT_ACTIVE_STACK == this); } // Clear the contents of the various lists ready for a new // recording void new_recording() { clear_stack(); // Defined in the storage class clear_independents(); clear_dependents(); clear_gradients(); // i_gradient_ is the maximum index of all currently constructed // aReal objects and max_gradient_ is the maximum index of all // that were used in a recording. Thus when deleting the // recording we need to set max_gradient_ to i_gradient_ or a // little more. max_gradient_ = i_gradient_+1; // Insert a null statement // std::cerr << "Inserting a null statement; when is this needed?\n"; push_lhs(-1); } // Are gradients to be computed? The default is "true", but if // ADEPT_RECORDING_PAUSABLE is defined then this may // be false bool is_recording() const { #ifdef ADEPT_RECORDING_PAUSABLE return is_recording_; #else return true; #endif } // Stop recording gradient information, enabling a piece of active // code to be run without the stack information being stored. This // only works if ADEPT_RECORDING_PAUSABLE has been defined. bool pause_recording() { #ifdef ADEPT_RECORDING_PAUSABLE is_recording_ = false; return true; #else return false; #endif } // Continue recording gradient information after a previous // pause_recording() call. This only works if // ADEPT_RECORDING_PAUSABLE has been defined. bool continue_recording() { #ifdef ADEPT_RECORDING_PAUSABLE is_recording_ = true; return true; #else return false; #endif } // For modular codes, some modules may have an existing Jacobian // code and possibly be unsuitable for automatic differentiation // using Adept (e.g. because they are written in Fortran). In // this case, we can use the following two functions to "wrap" the // non-Adept code. These are actually normally called by functions // of the same name in the Active, ActiveReference and // ActiveConstReference classes. void add_derivative_dependence(uIndex lhs_index, uIndex rhs_index, Real multiplier) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for 1 entry ADEPT_ACTIVE_STACK->check_space(1); #endif if (multiplier != 0.0) { push_rhs(multiplier, rhs_index); } push_lhs(lhs_index); #ifdef ADEPT_RECORDING_PAUSABLE } #endif } void append_derivative_dependence(uIndex lhs_index, uIndex rhs_index, Real multiplier) { #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for 1 entry ADEPT_ACTIVE_STACK->check_space(1); #endif if (multiplier != 0.0) { push_rhs(multiplier, rhs_index); } if (!update_lhs(lhs_index)) { throw wrong_gradient("Wrong gradient: append_derivative_dependence called on a different active number from the most recent add_derivative_dependence call" ADEPT_EXCEPTION_LOCATION); } #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // To enable the automatic differentiation of matrix // multiplication, this function performs a similar role to // aReal::add_derivative_dependence. We add a derivative // expression of the form d[lhs_index] = // sum(multiplier[i*multiplier_stride]*d[rhs_index+i*index_stride]), // where the summation is from i = 0 to n-1. Multiple calls to // this function may be carried out but must be followed by // push_lhs(lhs_index) to specify the left-hand-side of the // statement. template void push_derivative_dependence(uIndex rhs_index, const Type* multiplier, int n = 1, int index_stride = 1, int multiplier_stride = 1) { #ifdef ADEPT_RECORDING_PAUSABLE if (is_recording()) { #endif #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION // Check there is space in the operation stack for n entries check_space(n); #endif for (int i = 0; i < n; i++, rhs_index += index_stride, multiplier += multiplier_stride) { push_rhs(*multiplier, rhs_index); } #ifdef ADEPT_RECORDING_PAUSABLE } #endif } // Have the gradients been initialized? bool gradients_are_initialized() const { return gradients_initialized_; } // Return the number of statements, operations, and how much // memory has been allocated for each uIndex n_statements() const { return n_statements_; } uIndex n_allocated_statements() const { return n_allocated_statements_; } uIndex n_operations() const { return n_operations_; } uIndex n_allocated_operations() const { return n_allocated_operations_; } // Return the size of the two dimensions of a Jacobian matrix uIndex n_independents() const { return static_cast(independent_index_.size()); } uIndex n_dependents() const { return static_cast(dependent_index_.size()); } // Return the maximum number of gradients required to perform // adjoint calculation uIndex max_gradients() const { return max_gradient_; } // Return the highest gradient index on the left-hand-side of any // of the statements currently on the stack uIndex max_gradient_index() const { uIndex mg = 0; for (int is = 0; is < n_statements_; ++is) { if (statement_[is].index > mg) { mg = statement_[is].index; } } return mg; } // Return the index to the current gradient uIndex i_gradient() const { return i_gradient_; } // Return the number of gradients memory has been allocated for uIndex n_allocated_gradients() const { return n_allocated_gradients_; } // Return the number of bytes used std::size_t memory() const { std::size_t mem = n_statements()*sizeof(uIndex)*2 + n_operations()*(sizeof(Real)+sizeof(uIndex)); if (gradients_are_initialized()) { mem += max_gradients()*sizeof(Real); } return mem; } // Return the number of gradients currently registered uIndex n_gradients_registered() const { return n_gradients_registered_; } // Return the fraction of multipliers equal to the specified // number (usually -1, 0 or 1) Real fraction_multipliers_equal_to(Real val) { uIndex sum = 0; for (uIndex i = 0; i < n_operations_; i++) { if (multiplier_[i] == val) { sum++; } } return static_cast(sum)/static_cast(n_operations_); } bool is_thread_unsafe() const { return is_thread_unsafe_; } const GapList& gap_list() const { return gap_list_; } // Memory to store statements and operations can be preallocated, // offering modest performance advantage if you define // ADEPT_MANUAL_MEMORY_ALLOCATION and know the maximum number of // statements and operations you will need void preallocate_statements(uIndex n) { if (n_statements_+n+1 >= n_allocated_statements_) { grow_statement_stack(n); } } void preallocate_operations(uIndex n) { if (n_allocated_operations_ < n_operations_+n+1) { grow_operation_stack(n); } } // ------------------------------------------------------------------- // Stack: 4. Protected member functions // ------------------------------------------------------------------- protected: // Initialize the vector of gradients ready for the adjoint // calculation void initialize_gradients(); // Set to zero the gradients required by a Jacobian calculation /* void zero_gradient_multipass() { for (std::size_t i = 0; i < gradient_multipass_.size(); i++) { gradient_multipass_[i].zero(); } } */ // OpenMP versions of the forward and reverse Jacobian functions, // which are called from the jacobian_forward and jacobian_reverse // if OpenMP is enabled void jacobian_forward_openmp(Real* jacobian_out, Index dep_offset, Index indep_offset) const; void jacobian_reverse_openmp(Real* jacobian_out, Index dep_offset, Index indep_offset) const; // The core code for computing Jacobians, used in both OpenMP and // non-OpenMP versions void jacobian_forward_kernel(Real* __restrict gradient_multipass_b) const; void jacobian_forward_kernel_packet(Real* __restrict gradient_multipass_b) const; void jacobian_forward_kernel_extra(Real* __restrict gradient_multipass_b, uIndex) const; void jacobian_reverse_kernel(Real* __restrict gradient_multipass_b) const; void jacobian_reverse_kernel_packet(Real* __restrict gradient_multipass_b) const; void jacobian_reverse_kernel_extra(Real* __restrict gradient_multipass_b, uIndex) const; // ------------------------------------------------------------------- // Stack: 5. Data // ------------------------------------------------------------------- protected: #ifdef ADEPT_STACK_STORAGE_STL // Data are stored using standard template library containers // std::valarray gradient_; std::vector gradient_; #else // Data are stored as dynamically allocated arrays Real* __restrict gradient_; #endif // For Jacobians we process multiple rows/columns at once so need // what is essentially a 2D array // std::vector > gradient_multipass_; // uIndexs of the independent and dependent variables std::vector independent_index_; std::vector dependent_index_; // Keep a record of gaps in the gradient array to ensure that gaps // are filled GapList gap_list_; // Gap* most_recent_gap_; GapListIterator most_recent_gap_; uIndex i_gradient_; // Current number of gradients uIndex n_allocated_gradients_; // Number of allocated gradients uIndex max_gradient_; // Max number of gradients to store uIndex n_gradients_registered_; // Number of gradients registered bool gradients_initialized_; // Have the gradients been // initialized? bool is_thread_unsafe_; bool is_recording_; bool have_openmp_; // true if this header file // compiled with -fopenmp bool openmp_manually_disabled_; // true if user called // set_max_jacobian_threads(1) }; // End of Stack class // ------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------- // Sending a Stack object to a stream reports information about the // stack inline std::ostream& operator<<(std::ostream& os, const adept::Stack& stack) { stack.print_status(os); return os; } // Memory to store statements and operations can be preallocated, // offering modest performance advantage if you define // ADEPT_MANUAL_MEMORY_ALLOCATION and know the maximum number of // statements and operations you will need. This version is useful // in functions that don't have visible access to the currently // active Adept stack. inline void preallocate_statements(uIndex n) { ADEPT_ACTIVE_STACK->preallocate_statements(n); } inline void preallocate_operations(uIndex n) { ADEPT_ACTIVE_STACK->preallocate_operations(n); } // Returns a pointer to the currently active stack (or 0 if there is none) inline Stack* active_stack() { return ADEPT_ACTIVE_STACK; } // Return whether the active stack is stored in a global variable // (thread unsafe) rather than a thread-local global variable // (thread safe) #ifdef ADEPT_STACK_THREAD_UNSAFE inline bool is_thread_unsafe() { return true; } #else inline bool is_thread_unsafe() { return false; } #endif // Subsequent code should use adept::active_stack rather than this // preprocessor macro //#undef ADEPT_ACTIVE_STACK } // End of namespace adept #endif ================================================ FILE: include/adept/StackStorage.h ================================================ /* StackStorage.h -- Storage of statement & operation stacks Copyright (C) 2012-2014 University of Reading Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The Stack class inherits from a class providing the storage (and interface to the storage) for the derivative statements that are accumulated during the execution of an algorithm. The derivative statements are held in two stacks described by Hogan (2014): the "statement stack" and the "operation stack". This file provides the stack storage engine: blocks of dynamically allocated arrays. */ #ifndef AdeptStackStorage_H #define AdeptStackStorage_H 1 #include #include #include namespace adept { namespace internal { // Helper classes struct StatementBlock { StatementBlock(uIndex n_) : n(0), n_allocated(n_) { data = new Statement[n_]; } ~StatementBlock() { delete [] data; } // Data Statement* data; uIndex n; const uIndex n_allocated; }; struct OperationBlock { StatementBlock(uIndex n_) : n(0), n_allocated(n_) { multiplier = new Real[n_]; index = new uIndex[n_]; } ~StatementBlock() { delete [] multiplier; delete [] index; } // Data Real* multiplier; uIndex* index; uIndex n; uIndex n_allocated }; std::vector stack_block_; struct StackBlock { StatementBlock* statement_list; OperationBlock* operation_list; uIndex statement_start; uIndex statement_end; }; std::vector statement_data_; std::vector operation_data_; class StackStorage { public: // Constructor StackStorage() : statement_(0), multiplier_(0), index_(0), n_statements_(0), n_allocated_statements_(0), n_operations_(0), n_allocated_operations_(0) { } // Destructor ~StackStorage(); // Push an operation (i.e. a multiplier-gradient pair) on to the // stack. We assume here that check_space() as been called before // so there is enough space to hold these elements. void push_rhs(const Real& multiplier, const uIndex& gradient_index) { #ifdef ADEPT_REMOVE_NULL_STATEMENTS // If multiplier==0 then the resulting statement would have no // effect so we can speed up the subsequent adjoint/jacobian // calculations (at the expense of making this critical part // of the code slower) if (multiplier != 0.0) { #endif multiplier_[n_operations_] = multiplier; index_[n_operations_++] = gradient_index; #ifdef ADEPT_TRACK_NON_FINITE_GRADIENTS if (!std::isfinite(multiplier) || std::isinf(multiplier)) { throw non_finite_gradient(); } #endif #ifdef ADEPT_REMOVE_NULL_STATEMENTS } #endif } // Push a statement on to the stack: this is done after a // sequence of operation pushes; gradient_index is the index of // the gradient on the LHS of the expression, while the // "end_plus_one" element is simply the current length of the // operation list void push_lhs(const uIndex& gradient_index) { #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION if (n_statements_ >= n_allocated_statements_) { grow_statement_stack(); } #endif statement_[n_statements_].index = gradient_index; statement_[n_statements_++].end_plus_one = n_operations_; } // Push n left-hand-sides of differential expressions on to the // stack with no corresponding right-hand-side, appropriate if // an array of active variables contiguous in memory (or // separated by a fixed stride) has been assigned to inactive // numbers. void push_lhs_range(const uIndex& first, const uIndex& n, const uIndex& stride = 1) { uIndex last_plus_1 = first+n*stride; #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION if (n_statements_+n > n_allocated_statements_) { grow_statement_stack(n); } #endif for (uIndex i = first; i < last_plus_1; i += stride) { statement_[n_statements_].index = i; statement_[n_statements_++].end_plus_one = n_operations_; } } // Check whether the operation stack contains enough space for n // new operations; if not, grow it void check_space(const uIndex& n) { if (n_allocated_operations_ < n_operations_+n+1) { grow_operation_stack(n); } } template void check_space_static() { check_space(n); } protected: // Called by new_recording() void clear_stack() { // Set the recording indices to zero n_operations_ = 0; n_statements_ = 0; } // This function is called by the constructor to initialize // memory, which can be grown subsequently void initialize(uIndex n) { multiplier_ = new Real[n]; index_ = new uIndex[n]; n_allocated_operations_ = n; statement_ = new Statement[n]; n_allocated_statements_ = n; } // Grow the capacity of the operation or statement stacks to // hold a minimum of "min" elements. If min=0 then the stacks // are doubled in size. void grow_operation_stack(uIndex min = 0); void grow_statement_stack(uIndex min = 0); protected: // Data are stored as dynamically allocated arrays // The "statement stack" is held as a single array Statement* __restrict statement_ ; // The "operation stack" is held as two arrays Real* __restrict multiplier_; uIndex* __restrict index_; uIndex n_statements_; // Number of statements uIndex n_allocated_statements_; // Space allocated for statements uIndex n_operations_; // Number of operations uIndex n_allocated_operations_; // Space allocated for statements }; } // End namespace internal } // End namespace adept #endif ================================================ FILE: include/adept/StackStorageOrig.h ================================================ /* StackStorageOrig.h -- Original method to store statement & operation stacks Copyright (C) 2014-2015 University of Reading Author: Robin Hogan This file is part of the Adept library. The Stack class inherits from a class providing the storage (and interface to the storage) for the derivative statements that are accumulated during the execution of an algorithm. The derivative statements are held in two stacks described by Hogan (2014): the "statement stack" and the "operation stack". This file provides the original storage engine: dynamically allocated arrays with the two stacks resulting from an entire algorithm being contiguous in memory. This is not ideal for very large algorithms. */ #ifndef AdeptStackStorageOrig_H #define AdeptStackStorageOrig_H 1 #include #include #include namespace adept { namespace internal { class StackStorageOrig { public: // Constructor StackStorageOrig() : statement_(0), multiplier_(0), index_(0), n_statements_(0), n_allocated_statements_(0), n_operations_(0), n_allocated_operations_(0) { } // Destructor ~StackStorageOrig(); // Push an operation (i.e. a multiplier-gradient pair) on to the // stack. We assume here that check_space() as been called before // so there is enough space to hold these elements. void push_rhs(const Real& multiplier, const uIndex& gradient_index) { #ifdef ADEPT_REMOVE_NULL_STATEMENTS // If multiplier==0 then the resulting statement would have no // effect so we can speed up the subsequent adjoint/jacobian // calculations (at the expense of making this critical part // of the code slower) if (multiplier != 0.0) { #endif multiplier_[n_operations_] = multiplier; index_[n_operations_++] = gradient_index; #ifdef ADEPT_TRACK_NON_FINITE_GRADIENTS if (!std::isfinite(multiplier) || std::isinf(multiplier)) { throw non_finite_gradient(); } #endif #ifdef ADEPT_REMOVE_NULL_STATEMENTS } #endif } // Push the gradient indices of a vectorized operation on to the // stack. We assume here that check_space() as been called // before so there is enough space to hold these elements. The // multipliers will be added later. template void push_rhs_indices(const uIndex& gradient_index) { for (Index i = 0; i < Num; ++i) { index_[n_operations_+i*Stride] = gradient_index+i; } ++n_operations_; } // Push a statement on to the stack: this is done after a // sequence of operation pushes; gradient_index is the index of // the gradient on the LHS of the expression, while the // "end_plus_one" element is simply the current length of the // operation list void push_lhs(const uIndex& gradient_index) { #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION if (n_statements_ >= n_allocated_statements_) { grow_statement_stack(); } #endif statement_[n_statements_].index = gradient_index; statement_[n_statements_++].end_plus_one = n_operations_; } // Push n left-hand-sides of differential expressions on to the // stack with no corresponding right-hand-side, appropriate if // an array of active variables contiguous in memory (or // separated by a fixed stride) has been assigned to inactive // numbers. Note that the second and third arguments must not be // references, since they may be compile-time constants for // FixedArray objects. void push_lhs_range(const uIndex& first, uIndex n, uIndex stride = 1) { uIndex last_plus_1 = first+n*stride; #ifndef ADEPT_MANUAL_MEMORY_ALLOCATION if (n_statements_+n > n_allocated_statements_) { grow_statement_stack(n); } #endif for (uIndex i = first; i < last_plus_1; i += stride) { statement_[n_statements_].index = i; statement_[n_statements_++].end_plus_one = n_operations_; } } // Check whether the operation stack contains enough space for n // new operations; if not, grow it void check_space(uIndex n) { if (n_allocated_operations_ < n_operations_+n+1) { grow_operation_stack(n); } } template void check_space_static() { check_space(n); } protected: // Called by new_recording() void clear_stack() { // Set the recording indices to zero n_operations_ = 0; n_statements_ = 0; } // This function is called by the constructor to initialize // memory, which can be grown subsequently void initialize(uIndex n) { multiplier_ = new Real[n]; index_ = new uIndex[n]; n_allocated_operations_ = n; statement_ = new Statement[n]; n_allocated_statements_ = n; } // Grow the capacity of the operation or statement stacks to // hold a minimum of "min" elements. If min=0 then the stacks // are doubled in size. void grow_operation_stack(uIndex min = 0); void grow_statement_stack(uIndex min = 0); protected: // Data are stored as dynamically allocated arrays // The "statement stack" is held as a single array Statement* __restrict statement_ ; // The "operation stack" is held as two arrays Real* __restrict multiplier_; uIndex* __restrict index_; uIndex n_statements_; // Number of statements uIndex n_allocated_statements_; // Space allocated for statements uIndex n_operations_; // Number of operations uIndex n_allocated_operations_; // Space allocated for statements }; } // End namespace internal } // End namespace adept #endif ================================================ FILE: include/adept/StackStorageOrigStl.h ================================================ /* StackStorageOrigStl.h -- Original storage of stacks using STL containers Copyright (C) 2014-2015 University of Reading Author: Robin Hogan This file is part of the Adept library. The Stack class inherits from a class providing the storage (and interface to the storage) for the derivative statements that are accumulated during the execution of an algorithm. The derivative statements are held in two stacks described by Hogan (2014): the "statement stack" and the "operation stack". This file provides one of the original storage engine, which used std::vector to hold the two stacks. Note that these stacks are contiguous in memory, which is not ideal for very large algorithms. */ #ifndef AdeptStackStorageOrigStl_H #define AdeptStackStorageOrigStl_H 1 #include #include #include namespace adept { namespace internal { class StackStorageOrigStl { public: // Constructor StackStorageOrigStl() : n_statements_(0), n_allocated_statements_(0), n_operations_(0), n_allocated_operations_(0) { } // Destructor (does nothing) ~StackStorageOrigStl() { }; // Push an operation (i.e. a multiplier-gradient pair) on to the // stack. We assume here that check_space() as been called before // so there is enough space to hold these elements. void push_rhs(const Real& multiplier, const uIndex& gradient_index) { #ifdef ADEPT_REMOVE_NULL_STATEMENTS // If multiplier==0 then the resulting statement would have no // effect so we can speed up the subsequent adjoint/jacobian // calculations (at the expense of making this critical part // of the code slower) if (multiplier != 0.0) { #endif multiplier_.push_back(multiplier); index_.push_back(gradient_index); n_operations_++; #ifdef ADEPT_TRACK_NON_FINITE_GRADIENTS if (!std::isfinite(multiplier) || std::isinf(multiplier)) { throw non_finite_gradient(); } #endif #ifdef ADEPT_REMOVE_NULL_STATEMENTS } #endif } // Push a statement on to the stack: this is done after a // sequence of operation pushes; gradient_index is the index of // the gradient on the LHS of the expression, while the // "end_plus_one" element is simply the current length of the // operation list void push_lhs(const uIndex& gradient_index) { statement_.push_back(Statement(gradient_index, n_operations_)); n_statements_++; } // Push n left-hand-sides of differential expressions on to the // stack with no corresponding right-hand-side, appropriate if // an array of active variables contiguous in memory (or // separated by a fixed stride) has been assigned to inactive // numbers. void push_lhs_range(const uIndex& first, const uIndex& n, const uIndex& stride = 1) { uIndex last_plus_1 = first+n*stride; for (uIndex i = first; i < last_plus_1; i += stride) { statement_.push_back(Statement(i, n_operations_)); } n_statements_ += n; } // Check whether the operation stack contains enough space for n // new operations; for STL containers this does nothing void check_space(const uIndex& n) { } template void check_space_static() { } protected: // Called by new_recording() void clear_stack() { // If we use STL containers then the clear() function sets their // size to zero but leaves the memory allocated statement_.clear(); multiplier_.clear(); index_.clear(); // Set the recording indices to zero n_operations_ = 0; n_statements_ = 0; } // This function is called by the constructor to initialize // memory, which can be grown subsequently void initialize(uIndex n) { statement_.reserve(n); multiplier_.reserve(n); index_.reserve(n); } // Grow the capacity of the operation or statement stacks to // hold a minimum of "min" elements. If min=0 then the stacks // are doubled in size. void grow_operation_stack(uIndex min = 0); void grow_statement_stack(uIndex min = 0); protected: // Data are stored using standard template library containers // The "statement stack" is held as a single array std::vector statement_; // The "operation stack" is held as two arrays std::vector multiplier_; std::vector index_; uIndex n_statements_; // Number of statements uIndex n_allocated_statements_; // Space allocated for statements uIndex n_operations_; // Number of operations uIndex n_allocated_operations_; // Space allocated for statements }; } // End namespace internal } // End namespace adept #endif ================================================ FILE: include/adept/Statement.h ================================================ /* Statement.h -- Original method to store statement & operation stacks Copyright (C) 2012-2014 University of Reading Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptStatement_H #define AdeptStatement_H 1 #include namespace adept { namespace internal { // Structure describing the LHS of a derivative expression. For dx // = z dy + y dz, "index" would be the location of dx in the // gradient list, and "end_plus_one" would be one plus the location // of the final operation (multiplier-derivative pair) on the RHS, // in this case y dz. struct Statement { Statement() { } Statement(uIndex index_, uIndex end_plus_one_) : index(index_), end_plus_one(end_plus_one_) { } uIndex index; uIndex end_plus_one; }; } } #endif ================================================ FILE: include/adept/Storage.h ================================================ /* Storage.h -- store array of active or inactive data Copyright (C) 2012-2014 University of Reading Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. The Storage class manages the data underlying array objects, and uses a model of reference counting so that multiple objects can refer to the same data. This enables arrays that are actually subsets of another array to be treated as normal array objects. */ #ifndef AdeptStorage_H #define AdeptStorage_H 1 #include #include #include #include #include #include #include #include #include #ifdef ADEPT_STORAGE_THREAD_SAFE #include #endif namespace adept { // ------------------------------------------------------------------- // Global variables // ------------------------------------------------------------------- namespace internal { // To check for memory leaks, we keep a running total of the number // of Storage objects that are created and destroyed extern Index n_storage_objects_created_; extern Index n_storage_objects_deleted_; } // ------------------------------------------------------------------- // Definition of Storage class // ------------------------------------------------------------------- template class Storage { public: // ------------------------------------------------------------------- // Storage: 1. Constructors and destructor // ------------------------------------------------------------------- // The only way to construct this object is by passing it an // integer indicating the size, and optionally for active objects, // an integer representing the index to the gradients stored in // the stack. Storage(Index n, bool IsActive = false) : n_(n), n_links_(1), gradient_index_(-1) { data_ = internal::alloc_aligned(n); #ifdef ADEPT_INIT_REAL initialize(); #endif internal::n_storage_objects_created_++; #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION if (IsActive) { gradient_index_ = ADEPT_ACTIVE_STACK->register_gradients(n); } #endif } protected: // Only allow the class to destroy itself by putting in // "protected". FIX - would be better to start valid // gradient_index at 1, so 0 is reserved for invalid values. ~Storage() { internal::free_aligned(data_); #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION #ifdef ADEPT_RECORDING_PAUSABLE if (ADEPT_ACTIVE_STACK->is_recording()) { #endif if (gradient_index_ >= 0) { ADEPT_ACTIVE_STACK->unregister_gradients(gradient_index_, n_); } #ifdef ADEPT_RECORDING_PAUSABLE } #endif #endif internal::n_storage_objects_deleted_++; } // Null initialization, copy and assignment methods that are // "protected" to prevent them being used Storage() { } Storage(Storage& storage) { }; void operator=(Storage& storage) { }; #ifdef ADEPT_INIT_REAL // Initialize to zero, NaN or whatever for debugging template typename internal::enable_if::value, void>::type initialize() { for (int i = 0; i < n_; ++i) { data_[i] = ADEPT_INIT_REAL; } } template typename internal::enable_if::value, void>::type initialize() { for (int i = 0; i < n_; ++i) { #ifdef ADEPT_INIT_REAL_SNAN data_[i] = std::complex( std::numeric_limits::signaling_NaN(), std::numeric_limits::signaling_NaN()); #else data_[i] = std::complex(ADEPT_INIT_REAL, ADEPT_INIT_REAL); #endif } } // Dummy initialize for non-floats template typename internal::enable_if::value && !internal::is_complex::value, void>::type initialize() { } #endif // ------------------------------------------------------------------- // Storage: 2. Public member functions // ------------------------------------------------------------------- public: // Add link to an existing storage object void add_link() { n_links_++; } // Remove link as follows; this is only safe in a multi-threaded // environment if ADEPT_STORAGE_THREAD_SAFE is defined, making // n_links_ atomic void remove_link() { if (n_links_ == 0) { throw invalid_operation("Attempt to remove more links to a storage object than set" ADEPT_EXCEPTION_LOCATION); } else if (--n_links_ == 0) { delete this; } } // Return the number of elements allocated Index n_allocated() const { return n_; } // Return the number of links to an object int n_links() const { return n_links_; } Index gradient_index() const { return gradient_index_; } // Return pointer to the start of the data Type* data() { return data_; } const Type* data() const { return data_; } // Return a string of information std::string info_string() const { std::stringstream x; x << n_ << " " << sizeof(Type) << "-byte elements allocated with " << n_links_ << " links"; return x.str(); } // ------------------------------------------------------------------- // Storage: 3. Data // ------------------------------------------------------------------- private: // Pointer to the start of the data Type* data_; // Number of elements allocated Index n_; // Number of links to the storage object allowing for arrays and // array slices to point to the same data. If this falls to zero // the Storage object will destruct itself #ifdef ADEPT_STORAGE_THREAD_SAFE // If multiple threads are to simultaneously read subsets of this // array then accesses to the reference counter must be made // atomic std::atomic n_links_; #else int n_links_; #endif // For active variables, this s the gradient index of the first // element. It would be better to only store this if Type is // floating point. Index gradient_index_; }; // End of Storage class // ------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------- inline Index n_storage_objects() { return internal::n_storage_objects_created_ - internal::n_storage_objects_deleted_; } inline Index n_storage_objects_created() { return internal::n_storage_objects_created_; } inline Index n_storage_objects_deleted() { return internal::n_storage_objects_deleted_; } } // End namespace adept #endif ================================================ FILE: include/adept/UnaryOperation.h ================================================ /* UnaryOperation.h -- Unary operations on Adept expressions Copyright (C) 2014-2020 European Centre for Medium-Range Weather Forecasts Robin Hogan This file is part of the Adept library. */ #ifndef AdeptUnaryOperation_H #define AdeptUnaryOperation_H #include #include namespace adept { namespace internal { // --------------------------------------------------------------------- // SECTION 3.1: Unary operations: define UnaryOperation type // --------------------------------------------------------------------- // Unary operations derive from this class, where Op is a policy // class defining how to implement the operation, and R is the // type of the argument of the operation template class Op, class R> struct UnaryOperation : public Expression >, protected Op { static const int rank = R::rank; static const bool is_active = R::is_active && !is_same::value; static const int n_active = R::n_active; // FIX! Only store if active and if needed static const int n_scratch = 1 + R::n_scratch; static const int n_arrays = R::n_arrays; // Will need to modify this for sqrt: static const bool is_vectorizable = Op::is_vectorized && R::is_vectorizable; using Op::operation; using Op::operation_string; using Op::derivative; //const R& arg; typename nested_expression::type arg; UnaryOperation(const Expression& arg_) : arg(arg_.cast()) { } template bool get_dimensions_(ExpressionSize& dim) const { return arg.get_dimensions(dim); } std::string expression_string_() const { std::string str; str = operation_string(); str += "(" + arg.expression_string() + ")"; return str; } bool is_aliased_(const Type* mem1, const Type* mem2) const { return arg.is_aliased(mem1, mem2); } bool all_arrays_contiguous_() const { return arg.all_arrays_contiguous_(); } bool is_aligned_() const { return arg.is_aligned_(); } template int alignment_offset_() const { return arg.template alignment_offset_(); } template Type value_with_len_(Index i, Index len) const { return operation(arg.value_with_len(i, len)); } template void advance_location_(ExpressionSize& loc) const { arg.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return operation(arg.template value_at_location_(loc)); } template Packet packet_at_location_(const ExpressionSize& loc) const { return operation(arg.template packet_at_location_(loc)); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation(arg.template value_at_location_store_(loc, scratch)); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template PacketType values_at_location_(const ExpressionSize& loc) const { return operation(arg.template values_at_location_(loc)); } template typename enable_if::type values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation(arg.template values_at_location_store_(loc, scratch)); } template typename enable_if::type values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum]; } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { arg.template calc_gradient_(stack, loc, scratch, derivative(arg.template value_stored_(loc, scratch), scratch[MyScratchNum])); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { arg.template calc_gradient_(stack, loc, scratch, multiplier*derivative(arg.template value_stored_(loc, scratch), scratch[MyScratchNum])); } template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const ScratchVector >& scratch, ScratchVector >& gradients) const { arg.template calc_gradient_packet_(stack, loc, scratch, gradients, derivative(arg.template values_at_location_store_(loc, scratch), scratch[MyScratchNum])); } template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const ScratchVector >& scratch, ScratchVector >& gradients, const MyType& multiplier) const { arg.template calc_gradient_packet_(stack, loc, scratch, gradients, multiplier*derivative(arg.template values_at_location_store_(loc, scratch), scratch[MyScratchNum])); } template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { arg.template set_location_(i, index); } }; // End UnaryOperation type } // End namespace internal // --------------------------------------------------------------------- // SECTION 3.2: Unary operations: define specific operations // --------------------------------------------------------------------- // We may place the overloaded mathematical functions in the global // namespace provided that a using declaration enables the std:: // version of the function to be located #define ADEPT_DEF_UNARY_FUNC(NAME, FUNC, RAWFUNC, STRING, DERIVATIVE, \ ISVEC) \ namespace internal { \ template \ struct NAME { \ static const bool is_operator = false; \ static const bool is_vectorized = ISVEC; \ const char* operation_string() const { return STRING; } \ template \ T operation(const T& val) const { \ using RAWFUNC; \ return FUNC(val); \ } \ Type derivative(const Type& val, const Type& result) const { \ using std::sin; \ using std::cos; \ using std::sqrt; \ using std::cosh; \ using std::sinh; \ using std::exp; \ return DERIVATIVE; \ } \ Type fast_sqr(Type val) const { return val*val; } \ }; \ } /* End namespace internal */ \ template \ inline \ adept::internal::UnaryOperation \ FUNC(const adept::Expression& r) { \ return adept::internal::UnaryOperation(r.cast()); \ } // Functions y(x) whose derivative depends on the argument of the // function, i.e. dy(x)/dx = f(x) ADEPT_DEF_UNARY_FUNC(Log, log, std::log, "log", 1.0/val, false) ADEPT_DEF_UNARY_FUNC(Log10, log10, std::log10, "log10", 0.43429448190325182765/val, false) ADEPT_DEF_UNARY_FUNC(Sin, sin, std::sin, "sin", cos(val), false) ADEPT_DEF_UNARY_FUNC(Cos, cos, std::cos, "cos", -sin(val), false) ADEPT_DEF_UNARY_FUNC(Tan, tan, std::tan, "tan", 1.0/fast_sqr(cos(val)), false) ADEPT_DEF_UNARY_FUNC(Asin, asin, std::asin, "asin", 1.0/sqrt(1.0-val*val), false) ADEPT_DEF_UNARY_FUNC(Acos, acos, std::acos, "acos", -1.0/sqrt(1.0-val*val), false) ADEPT_DEF_UNARY_FUNC(Atan, atan, std::atan, "atan", 1.0/(1.0+val*val), false) ADEPT_DEF_UNARY_FUNC(Sinh, sinh, std::sinh, "sinh", cosh(val), false) ADEPT_DEF_UNARY_FUNC(Cosh, cosh, std::cosh, "cosh", sinh(val), false) ADEPT_DEF_UNARY_FUNC(Abs, abs, std::abs, "abs", ((val>0.0)-(val<0.0)), false) ADEPT_DEF_UNARY_FUNC(Fabs, fabs, std::fabs, "fabs", ((val>0.0)-(val<0.0)), false) // Functions y(x) whose derivative depends on the result of the // function, i.e. dy(x)/dx = f(y) ADEPT_DEF_UNARY_FUNC(Sqrt, sqrt, std::sqrt, "sqrt", 0.5/result, true) ADEPT_DEF_UNARY_FUNC(Tanh, tanh, std::tanh, "tanh", 1.0 - result*result, false) // Adept's vectorizable exponential function ADEPT_DEF_UNARY_FUNC(Fastexp, fastexp, adept::fastexp, "fastexp", result, true) #ifdef ADEPT_FAST_EXPONENTIAL ADEPT_DEF_UNARY_FUNC(Exp, exp, adept::functions::exp, "fastexp", result, true) #else ADEPT_DEF_UNARY_FUNC(Exp, exp, std::exp, "exp", result, false) #endif // Functions with zero derivative ADEPT_DEF_UNARY_FUNC(Ceil, ceil, std::ceil, "ceil", 0.0, false) ADEPT_DEF_UNARY_FUNC(Floor, floor, std::floor, "floor", 0.0, false) // Functions defined in the std namespace in C++11 but only in the // global namespace before that #ifdef ADEPT_CXX11_FEATURES ADEPT_DEF_UNARY_FUNC(Log2, log2, std::log2, "log2", 1.44269504088896340737/val, false) ADEPT_DEF_UNARY_FUNC(Expm1, expm1, std::expm1, "expm1", exp(val), false) ADEPT_DEF_UNARY_FUNC(Exp2, exp2, std::exp2, "exp2", 0.6931471805599453094172321214581766*result, false) ADEPT_DEF_UNARY_FUNC(Log1p, log1p, std::log1p, "log1p", 1.0/(1.0+val), false) ADEPT_DEF_UNARY_FUNC(Asinh, asinh, std::asinh, "asinh", 1.0/sqrt(val*val+1.0), false) ADEPT_DEF_UNARY_FUNC(Acosh, acosh, std::acosh, "acosh", 1.0/sqrt(val*val-1.0), false) ADEPT_DEF_UNARY_FUNC(Atanh, atanh, std::atanh, "atanh", 1.0/(1.0-val*val), false) ADEPT_DEF_UNARY_FUNC(Erf, erf, std::erf, "erf", 1.12837916709551*exp(-val*val), false) ADEPT_DEF_UNARY_FUNC(Erfc, erfc, std::erfc, "erfc", -1.12837916709551*exp(-val*val), false) ADEPT_DEF_UNARY_FUNC(Cbrt, cbrt, std::cbrt, "cbrt", (1.0/3.0)/(result*result), false) ADEPT_DEF_UNARY_FUNC(Round, round, std::round, "round", 0.0, false) ADEPT_DEF_UNARY_FUNC(Trunc, trunc, std::trunc, "trunc", 0.0, false) ADEPT_DEF_UNARY_FUNC(Rint, rint, std::rint, "rint", 0.0, false) ADEPT_DEF_UNARY_FUNC(Nearbyint,nearbyint,std::nearbyint,"nearbyint",0.0, false) #else ADEPT_DEF_UNARY_FUNC(Log2, log2, ::log2, "log2", 1.44269504088896340737/val, false) ADEPT_DEF_UNARY_FUNC(Expm1, expm1, ::expm1, "expm1", exp(val), false) ADEPT_DEF_UNARY_FUNC(Exp2, exp2, ::exp2, "exp2", 0.6931471805599453094172321214581766*result, false) ADEPT_DEF_UNARY_FUNC(Log1p, log1p, ::log1p, "log1p", 1.0/(1.0+val), false) ADEPT_DEF_UNARY_FUNC(Asinh, asinh, ::asinh, "asinh", 1.0/sqrt(val*val+1.0), false) ADEPT_DEF_UNARY_FUNC(Acosh, acosh, ::acosh, "acosh", 1.0/sqrt(val*val-1.0), false) ADEPT_DEF_UNARY_FUNC(Atanh, atanh, ::atanh, "atanh", 1.0/(1.0-val*val), false) ADEPT_DEF_UNARY_FUNC(Erf, erf, ::erf, "erf", 1.12837916709551*exp(-val*val), false) ADEPT_DEF_UNARY_FUNC(Erfc, erfc, ::erfc, "erfc", -1.12837916709551*exp(-val*val), false) ADEPT_DEF_UNARY_FUNC(Cbrt, cbrt, ::cbrt, "cbrt", (1.0/3.0)/(result*result), false) ADEPT_DEF_UNARY_FUNC(Round, round, ::round, "round", 0.0, false) ADEPT_DEF_UNARY_FUNC(Trunc, trunc, ::trunc, "trunc", 0.0, false) ADEPT_DEF_UNARY_FUNC(Rint, rint, ::rint, "rint", 0.0, false) ADEPT_DEF_UNARY_FUNC(Nearbyint,nearbyint,::nearbyint,"nearbyint",0.0, false) #endif //#undef ADEPT_DEF_UNARY_FUNC #define ADEPT_DEF_UNARY_OP(NAME, FUNC, RAWFUNC, STRING, DERIVATIVE, \ ISVEC) \ namespace internal { \ template \ struct NAME { \ static const bool is_operator = false; \ static const bool is_vectorized = ISVEC; \ const char* operation_string() const { return STRING; } \ template \ T operation(const T& val) const { \ return RAWFUNC(val); \ } \ Type derivative(const Type& val, const Type& result) const { \ return DERIVATIVE; \ } \ Type fast_sqr(Type val) { return val*val; } \ }; \ } /* End namespace internal */ \ template \ inline \ adept::internal::UnaryOperation \ FUNC(const adept::Expression& r) { \ return adept::internal::UnaryOperation(r.cast()); \ } // Operators ADEPT_DEF_UNARY_OP(UnaryPlus, operator+, +, "+", 1.0, true) ADEPT_DEF_UNARY_OP(UnaryMinus, operator-, -, "-", -1.0, true) ADEPT_DEF_UNARY_OP(Not, operator!, !, "!", 0.0, false) // --------------------------------------------------------------------- // SECTION 3.4: Unary operations: transpose function [DELETED] // --------------------------------------------------------------------- // --------------------------------------------------------------------- // SECTION 3.5: Unary operations: returning boolean expression // --------------------------------------------------------------------- namespace internal { // Unary operations returning bool derive from this class, where // Op is a policy class defining how to implement the operation, // and R is the type of the argument of the operation template class Op, class R> struct UnaryBoolOperation : public Expression >, protected Op { static const int rank = R::rank; static const bool is_active = false; static const int n_active = 0; static const int n_scratch = 0; static const int n_arrays = R::n_arrays; using Op::operation; using Op::operation_string; const R& arg; UnaryBoolOperation(const Expression& arg_) : arg(arg_.cast()) { } template bool get_dimensions_(ExpressionSize& dim) const { return arg.get_dimensions(dim); } std::string expression_string_() const { std::string str; str = operation_string(); str += "(" + static_cast(&arg)->expression_string() + ")"; return str; } bool is_aliased_(const bool* mem1, const bool* mem2) const { return false; } bool all_arrays_contiguous_() const { return arg.all_arrays_contiguous_(); } template int alignment_offset_() const { return arg.template alignment_offset_(); } template Type value_with_len_(Index i, Index len) const { return operation(arg.value_with_len(i, len)); } template void advance_location_(ExpressionSize& loc) const { arg.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return operation(arg.template value_at_location_(loc)); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = operation(arg.template value_at_location_store_(loc, scratch)); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template PacketType values_at_location_(const ExpressionSize& loc) const { return operation(arg.template values_at_location_(loc)); } template PacketType values_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return operation(arg.template values_at_location_(loc)); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const ScratchVector >& scratch, ScratchVector >& gradients) const {} template void calc_gradient_packet_(Stack& stack, const ExpressionSize& loc, const ScratchVector >& scratch, ScratchVector >& gradients, const MyType& multiplier) const {} template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { arg.template set_location_(i, index); } }; } // End namespace internal #define ADEPT_DEF_UNARY_BOOL_FUNC(NAME, FUNC, RAWFUNC) \ namespace internal { \ template \ struct NAME { \ const char* operation_string() const { return #FUNC; } \ bool operation(const Type& val) const { \ using RAWFUNC; \ return FUNC(val); /* RAWFUNC(val); */ \ } \ }; \ } /* End namespace internal */ \ template \ inline \ adept::internal::UnaryBoolOperation \ FUNC(const adept::Expression& r){ \ return adept::internal::UnaryBoolOperation(r.cast()); \ } ADEPT_DEF_UNARY_BOOL_FUNC(IsNan, isnan, std::isnan) ADEPT_DEF_UNARY_BOOL_FUNC(IsInf, isinf, std::isinf) ADEPT_DEF_UNARY_BOOL_FUNC(IsFinite, isfinite, std::isfinite) //#undef ADEPT_DEF_UNARY_BOOL_FUNC } /* End namespace adept */ #endif ================================================ FILE: include/adept/array_shortcuts.h ================================================ /* array_shortcuts.h -- Definitions of "shortcut" typedefs for array types Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptArrayShortcuts_H #define AdeptArrayShortcuts_H #include #include #include namespace adept { // --------------------------------------------------------------------- // Pretty typedefs to avoid the need for template arguments // --------------------------------------------------------------------- typedef Array<1> Vector; typedef Array<2> Matrix; typedef Array<3> Array3; // Deprecated typedef Array<3> Array3D; typedef Array<4> Array4D; typedef Array<5> Array5D; typedef Array<6> Array6D; typedef Array<7> Array7D; typedef Array<1,Index> IntVector; typedef Array<2,Index> IntMatrix; typedef Array<3,Index> IntArray3; // Deprecated typedef Array<3,Index> IntArray3D; typedef Array<1,int> intVector; typedef Array<2,int> intMatrix; typedef Array<3,int> intArray3; // Deprecated typedef Array<3,int> intArray3D; typedef Array<4,int> intArray4D; typedef Array<5,int> intArray5D; typedef Array<6,int> intArray6D; typedef Array<7,int> intArray7D; typedef Array<1,bool> boolVector; typedef Array<2,bool> boolMatrix; typedef Array<3,bool> boolArray3; // Deprecated typedef Array<3,bool> boolArray3D; typedef Array<4,bool> boolArray4D; typedef Array<5,bool> boolArray5D; typedef Array<6,bool> boolArray6D; typedef Array<7,bool> boolArray7D; typedef Array<1,float> floatVector; typedef Array<2,float> floatMatrix; typedef Array<3,float> floatArray3; // Deprecated typedef Array<3,float> floatArray3D; typedef Array<4,float> floatArray4D; typedef Array<5,float> floatArray5D; typedef Array<6,float> floatArray6D; typedef Array<7,float> floatArray7D; typedef SpecialMatrix, false> SquareMatrix; typedef SpecialMatrix, false> DiagMatrix; typedef SpecialMatrix, false> TridiagMatrix; typedef SpecialMatrix, false> PentadiagMatrix; typedef SpecialMatrix, false> SymmMatrix; typedef SpecialMatrix, false> LowerMatrix; typedef SpecialMatrix, false> UpperMatrix; typedef FixedArray Vector2; typedef FixedArray Vector3; typedef FixedArray Vector4; typedef FixedArray Matrix22; typedef FixedArray Matrix33; typedef FixedArray Matrix44; // If automatic differentiation is turned off then aVector and // friends become identical to their inactive counterparts #ifdef ADEPT_NO_AUTOMATIC_DIFFERENTIATION #define ADEPT_IS_ACTIVE false #else #define ADEPT_IS_ACTIVE true #endif typedef Array<1,Real,ADEPT_IS_ACTIVE> aVector; typedef Array<2,Real,ADEPT_IS_ACTIVE> aMatrix; typedef Array<3,Real,ADEPT_IS_ACTIVE> aArray3; // Deprecated typedef Array<3,Real,ADEPT_IS_ACTIVE> aArray3D; typedef Array<4,Real,ADEPT_IS_ACTIVE> aArray4D; typedef Array<5,Real,ADEPT_IS_ACTIVE> aArray5D; typedef Array<6,Real,ADEPT_IS_ACTIVE> aArray6D; typedef Array<7,Real,ADEPT_IS_ACTIVE> aArray7D; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aSquareMatrix; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aDiagMatrix; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aTridiagMatrix; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aPentadiagMatrix; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aSymmMatrix; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aLowerMatrix; typedef SpecialMatrix, ADEPT_IS_ACTIVE> aUpperMatrix; typedef FixedArray aVector2; typedef FixedArray aVector3; typedef FixedArray aVector4; typedef FixedArray aMatrix22; typedef FixedArray aMatrix33; typedef FixedArray aMatrix44; #undef ADEPT_IS_ACTIVE } // End namespace adept #endif ================================================ FILE: include/adept/base.h ================================================ /* base.h -- Basic definitions Copyright (C) 2012-2014 University of Reading Copyright (C) 2015-2021 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptBase_H #define AdeptBase_H 1 #include // --------------------------------------------------------------------- // 0: Adept version number // --------------------------------------------------------------------- // The version of the Adept library is specified both as a string and // an integer #define ADEPT_VERSION 20100 #define ADEPT_VERSION_STR "2.1" // --------------------------------------------------------------------- // 1: Defines not requiring a library recompile // --------------------------------------------------------------------- // The following can either be changed here, or define them just // before including this header file in your code, or define using the // -Dxxx compiler option. These options to not need the library to be // recompiled. // A globally accessible stack needs to be present for arithmetic // statements to access; by default this is thread safe but if you // know you are running a single-threaded application then slightly // faster performance may be achieved by defining this. Note that in // section 4 of this header file, ADEPT_STACK_THREAD_UNSAFE is // explicitly defined on the Mac OS platform, since the executable // format used typically does not support thread-local storage. //#define ADEPT_STACK_THREAD_UNSAFE 1 // Define this to check whether the "multiplier" is zero before it is // placed on the operation stack. This makes the forward pass slower // and the reverse pass slightly faster, and is only worthwhile if // many reverse passes will be carried out per forward pass (or if you // have good reason to believe many variables in your code are zero). // #define ADEPT_REMOVE_NULL_STATEMENTS 1 // If using the same code for both forward-only and // forward-and-reverse calculations, then it is useful to be able to // dynamically control whether or not gradient information is computed // by expressions in the forward pass using the pause_recording() and // continue_recording() functions. To enable this feature uncomment // the following, but note that it slows down the forward pass a // little. //#define ADEPT_RECORDING_PAUSABLE 1 // Initialize real types to signaling NaN or zero //#define ADEPT_INIT_REAL_SNAN 1 //#define ADEPT_INIT_REAL_ZERO 1 // Often when you first convert a code for automatic differentiation // the gradients computed contain NaNs or infinities: uncommenting the // following will check for these and throw an error when they are // found, so that by running the program in a debugger and looking at // the backtrace, you can locate the source. //#define ADEPT_TRACK_NON_FINITE_GRADIENTS 1 // If this is defined then each mathematical operation does not // involve a check whether more memory needs to be allocated; rather // the user first specifies how much memory to allocate to hold the // entire algorithm via the preallocate_statements and // preallocate_operations functions. This is a little faster, but is // obviously risky if you don't anticipate correctly how much memory // will be needed. //#define ADEPT_MANUAL_MEMORY_ALLOCATION 1 // Do we check array bounds when indexing arrays? //#define ADEPT_BOUNDS_CHECKING 1 // Do we disable dimension checking when assigning an array expression // to another array? //#define ADEPT_NO_DIMENSION_CHECKING 1 // Do we disable automatic alias checking in array operations? //#define ADEPT_NO_ALIAS_CHECKING 1 // Does adept::exp when applied to Adept types such as arrays invoke a // faster vectorizable exponential function? This is not bit // reproducible with "exp" in the standard library, but the faster // function is always available as adept::fastexp (and this also works // on scalars). Note that when applied to an Adept type, a simple // "exp" selects the function from the adept namespace. //#define ADEPT_FAST_EXPONENTIAL 1 // The following will define the adept::exp function for the scalar // types "float" and "double" to call the faster exponential function, // bit reproducible with the vectorizable one above. However, this // can cause a namespace clash as some C header files import "exp" // outside of any namespace. Alternatively you can use adept::fastexp // on scalars. //#define ADEPT_FAST_SCALAR_EXPONENTIAL 1 // A shortcut for faster execution that does not change the behaviour // of single-threaded bug-free code that uses the "eval" function in // case of aliasing. ADEPT_FAST_EXPONENTIAL changes results so is not // activated wtih ADEPT_FAST. #ifdef ADEPT_FAST #define ADEPT_STACK_THREAD_UNSAFE 1 #define ADEPT_NO_DIMENSION_CHECKING 1 #define ADEPT_NO_ALIAS_CHECKING 1 #endif // The compiler option -ffast-math turns on __FAST_MATH__ and allows // for optimizations that may not be bit-reproducible or do all the // normal error checking - Adept's fast exponential falls into this // category. #ifdef __FAST_MATH__ #define ADEPT_FAST_EXPONENTIAL 1 #endif // The initial size of the stacks, which can be grown if required #ifndef ADEPT_INITIAL_STACK_LENGTH #define ADEPT_INITIAL_STACK_LENGTH 1048576 #endif // The statement and operation stacks #ifndef ADEPT_STACK_BLOCK_LENGTH #define ADEPT_STACK_BLOCK_LENGTH 1048576 #endif //#define ADEPT_SUPPORT_HUGE_ARRAYS 1 // Since subsetting an array causes a modification to the reference // counter in the underlying storage object, multiple threads // subsetting the same array can cause clashes unless the reference // counter is protected by a mutex. This is possible on C++11 by // making the reference counter of type std::atomic, enabled by // defining the following: //#define ADEPT_STORAGE_THREAD_SAFE // --------------------------------------------------------------------- // 2: Defines requiring a library recompile // --------------------------------------------------------------------- // The "stack" containing derivative information can be implemented in // two ways: if ADEPT_STACK_STORAGE_STL is defined then C++ STL // containers are used, otherwise dynamically allocated arrays are // used. Experience says that dynamically allocated arrays are faster. //#define ADEPT_STACK_STORAGE_STL 1 // The number of rows/columns of a Jacobian that are calculated at // once. The optimum value depends on platform, the size of your // Jacobian and the number of OpenMP threads available. #ifndef ADEPT_MULTIPASS_SIZE //#define ADEPT_MULTIPASS_SIZE 1 //#define ADEPT_MULTIPASS_SIZE 2 #define ADEPT_MULTIPASS_SIZE 4 //#define ADEPT_MULTIPASS_SIZE 8 //#define ADEPT_MULTIPASS_SIZE 15 //#define ADEPT_MULTIPASS_SIZE 16 //#define ADEPT_MULTIPASS_SIZE 32 //#define ADEPT_MULTIPASS_SIZE 64 #endif // If ADEPT_MULTIPASS_SIZE > ADEPT_MULTIPASS_SIZE_ZERO_CHECK then the // Jacobian calculation will try to remove redundant loops involving // zeros; note that this may inhibit auto-vectorization #define ADEPT_MULTIPASS_SIZE_ZERO_CHECK 64 #define PACKET_SIZE_ZERO_CHECK 64 // By default the precision of differentiated expressions is "double". // To override this, define ADEPT_REAL_TYPE_SIZE to 4 (float), 8 // (double) or 16 (long double). Note that if you specify 16 but on // your system "long double" is actually the same as double, then the // code will fail to compile. //#define ADEPT_REAL_TYPE_SIZE 8 // Thread-local storage is used for the global Stack pointer to ensure // thread safety. In pre-C++11 compilers, thread-local variables are // declared in different ways by different compilers, the most common // ones being detected in section 4 below. Some platforms // (particularly some Mac platforms) do not implement thread-local // storage, and therefore on Mac thread-local storage is disabled. If // you want to manually specify how thread-local storage is declared, // you may do it here. If thread-local storage is not available on // your platform but is not detected in section 4, and consequently // you cannot get the code to compile, then you can make an empty // declaration here. //#define ADEPT_THREAD_LOCAL thread_local // Define the following if you wish to use OpenMP to accelerate array // expressions //#define ADEPT_OPENMP_ARRAY_OPERATIONS 1 // This cannot be changed without rewriting the Adept library #define ADEPT_MAX_ARRAY_DIMENSIONS 7 // --------------------------------------------------------------------- // 4: Miscellaneous // --------------------------------------------------------------------- // Various C++11 features #if __cplusplus > 199711L // We can optimize the returning of Arrays from functions with move // semantics: #define ADEPT_MOVE_SEMANTICS 1 // Other C++11 features such as initializer lists, thread_local // keyword, extra mathematical functions etc: #define ADEPT_CXX11_FEATURES 1 #elif defined(_MSVC_LANG) // Microsoft will only update __cplusplus when all C++11 features are // included #if _MSVC_LANG > 199711L #define ADEPT_MOVE_SEMANTICS 1 #define ADEPT_CXX11_FEATURES 1 #endif #endif // Check C++11 is being used if thread-safe array storage is required #ifdef ADEPT_STORAGE_THREAD_SAFE #ifndef ADEPT_CXX11_FEATURES #error "Thread-safe array storage is only available with C++11" #endif #endif // The following attempt to align the data to facilitate SSE2 // vectorization did not work so is disabled #ifdef __GNUC__ //#define ADEPT_SSE2_ALIGNED __attribute__ ((aligned (16))) #define ADEPT_SSE2_ALIGNED #else #define ADEPT_SSE2_ALIGNED #endif // The way thread-local variables are specified pre-C++11 is compiler // specific. You can specify this manually by defining the // ADEPT_THREAD_LOCAL preprocessor variable in the previous section, // otherwise it is defined here depending on your compiler #ifndef ADEPT_THREAD_LOCAL #ifdef __APPLE__ #ifdef __GNUC__ // GNU C++11 compiler on Mac should support thread_local #ifdef ADEPT_CXX11_FEATURES #define ADEPT_THREAD_LOCAL thread_local #endif #elif defined(__has_feature) // Clang supports "__has_feature": check if thread_local is // available #if __has_feature(cxx_thread_local) #define ADEPT_THREAD_LOCAL thread_local #endif #endif // When thread_local is unavailable we turn it off and provide a // blank definition of ADEPT_THREAD_LOCAL. #ifndef ADEPT_THREAD_LOCAL #define ADEPT_STACK_THREAD_UNSAFE 1 #define ADEPT_THREAD_LOCAL #endif #elif defined(ADEPT_CXX11_FEATURES) // C++11 has thread_local as part of the language, and should be // supported on non-Mac C++11 platforms #define ADEPT_THREAD_LOCAL thread_local #elif defined(_MSC_VER) // Microsoft C++98 has a different way to specify thread-local // storage from the GCC/Intel/Sun/IBM compilers. #define ADEPT_THREAD_LOCAL __declspec(thread) #else // The following should work on GCC/Intel/Sun/IBM C++98 compilers #define ADEPT_THREAD_LOCAL __thread #endif #endif // If we use OpenMP to parallelize array expressions then some // variables local to active operation structures (Multiply etc) need // to be made thread-local #ifdef ADEPT_OPENMP_ARRAY_OPERATIONS #define ADEPT_THREAD_LOCAL_IF_OPENMP ADEPT_THREAD_LOCAL #else #define ADEPT_THREAD_LOCAL_IF_OPENMP #endif // Currently the design of the stack means that automatic // differentiation of matrix multiplication is very inefficient. A // future version of Adept will redesign the stack to store directives // enabling efficient implementation of the derivative of a matrix // multiplication, and this will be applicable to different types of // matrix (dense, symmetric, banded, upper and lower). But for now, // only differentiation of dense active matrices // (i.e. Array<2,Real,true>) is implemented. Therefore other types // of active matrix need to be converted to this type before they can // be used in matrix multiplication. #define ADEPT_ONLY_DIFFERENTIATE_DENSE_MATRIX_MULTIPLICATION 1 // To find bugs it can be useful to initialize arrays to signaling // NaNs, in which case ADEPT_INIT_REAL is set and used internally #ifdef ADEPT_INIT_REAL_SNAN #define ADEPT_INIT_REAL std::numeric_limits::signaling_NaN() #elif defined(ADEPT_INIT_REAL_ZERO) #define ADEPT_INIT_REAL 0.0 #endif // --------------------------------------------------------------------- // 5: Define basic floating-point and integer types // --------------------------------------------------------------------- namespace adept { // An older version of Adept used ADEPT_FLOATING_POINT_TYPE to // define alternative underlying types for "Real", but unfortunately // the preprocessor cannot check if a preprocessor variable is of // type "long double", so a numerical value is used instead #ifdef ADEPT_FLOATING_POINT_TYPE #undef ADEPT_FLOATING_POINT_TYPE #error ADEPT_FLOATING_POINT_TYPE is deprecated: use ADEPT_REAL_TYPE_SIZE instead #endif #ifndef ADEPT_REAL_TYPE_SIZE #define ADEPT_REAL_TYPE_SIZE 8 #endif #if ADEPT_REAL_TYPE_SIZE == 4 typedef float Real; #elif ADEPT_REAL_TYPE_SIZE == 8 typedef double Real; #elif ADEPT_REAL_TYPE_SIZE == 16 typedef long double Real; #else #undef ADEPT_REAL_TYPE_SIZE #error If defined, ADEPT_REAL_TYPE_SIZE must be 4 (float), 8 (double) or 16 (long double) #endif // By default sizes of arrays, indices to them, and indices in the // automatic differentiation stack are stored as 4-byte integers, // but for very large arrays and algorithms, larger types may be // needed. Remember that on 32-bit platforms this will have no // effect. #ifdef ADEPT_SUPPORT_HUGE_ARRAYS typedef std::size_t uIndex; // Unsigned typedef std::ptrdiff_t Index; // Signed #else // typedef unsigned int uIndex; typedef int uIndex; typedef int Index; #endif // --------------------------------------------------------------------- // 6: Disable stupid warnings // --------------------------------------------------------------------- #ifdef __INTEL_COMPILER // "type qualifiers are meaningless here" #pragma warning disable 2536 #elif defined(_MSC_VER) // "multiple copy constructors specified" #pragma warning( disable : 4521 ) #endif } // End namespace adept #endif ================================================ FILE: include/adept/contiguous_matrix.h ================================================ /* contiguous_matrix.h -- Return matrix with contiguous storage Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptContiguousMatrix_H #define AdeptContiguousMatrix_H 1 #include namespace adept { namespace internal { // If for input into BLAS or LAPACK a matrix is required to have // one dimension contiguous and increasing in memory, then call // this function: if the matrix has this property then the // returned matrix in "out" will be linked to the input matrix; // otherwise, "out" will be a copy of "in" but satisfying this // condition. The returned "order" is ROW_MAJOR or COL_MAJOR // stating the storage type of the returned matrix. template MatrixStorageOrder contiguous_matrix(Array<2,T,IsActive>& in, Array<2,T,IsActive>& out, Index& stride) { MatrixStorageOrder order = ROW_MAJOR; if (in.empty()) { throw(invalid_operation("Input matrix must not be empty")); } if (in.dimension(1) == 1) { out.link(in); stride = in.offset(0); } else if (in.dimension(0) == 1) { order = COL_MAJOR; out.link(in); stride = in.offset(1); } else { out.resize_row_major(in.dimensions()); out = in; stride = in.offset(0); } return order; } // As contiguous_matrix but checks that the input matrix is square template MatrixStorageOrder contiguous_square_matrix(Array<2,T,IsActive>& in, Array<2,T,IsActive>& out, Index& stride) { if (in.dimension(0) != in.dimension(1)) { throw(invalid_operation("Square matrix required")); } return contiguous_matrix(in, out, stride); } } } #endif ================================================ FILE: include/adept/cppblas.h ================================================ /* cppblas.h -- C++ interface to BLAS functions Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. This file provides a C++ interface to selected Level-2 and -3 BLAS functions in which the precision of the arguments (float versus double) is inferred via overloading */ #ifndef AdeptCppBlas_H #define AdeptCppBlas_H 1 namespace adept { namespace internal { typedef bool BLAS_ORDER; typedef char BLAS_TRANSPOSE; typedef char BLAS_UPLO; typedef char BLAS_SIDE; static const BLAS_ORDER BlasRowMajor = false; static const BLAS_ORDER BlasColMajor = true; static const BLAS_TRANSPOSE BlasNoTrans = 'N'; static const BLAS_TRANSPOSE BlasTrans = 'T'; static const BLAS_TRANSPOSE BlasConjTrans = 'C'; static const BLAS_UPLO BlasUpper = 'U'; static const BLAS_UPLO BlasLower = 'L'; static const BLAS_SIDE BlasLeft = 'L'; static const BLAS_SIDE BlasRight = 'R'; // Matrix-matrix multiplication for general dense matrices #define ADEPT_DEFINE_GEMM(T) \ void cppblas_gemm(const BLAS_ORDER Order, \ const BLAS_TRANSPOSE TransA, \ const BLAS_TRANSPOSE TransB, \ const int M, const int N, \ const int K, const T alpha, const T *A, \ const int lda, const T *B, const int ldb, \ const T beta, T *C, const int ldc); ADEPT_DEFINE_GEMM(double) ADEPT_DEFINE_GEMM(float) #undef ADEPT_DEFINE_GEMM // Matrix-vector multiplication for a general dense matrix #define ADEPT_DEFINE_GEMV(T) \ void cppblas_gemv(const BLAS_ORDER order, \ const BLAS_TRANSPOSE TransA, \ const int M, const int N, \ const T alpha, const T *A, const int lda, \ const T *X, const int incX, const T beta, \ T *Y, const int incY); ADEPT_DEFINE_GEMV(double) ADEPT_DEFINE_GEMV(float) #undef ADEPT_DEFINE_GEMV // Matrix-matrix multiplication where matrix A is symmetric #define ADEPT_DEFINE_SYMM(T) \ void cppblas_symm(const BLAS_ORDER Order, \ const BLAS_SIDE Side, \ const BLAS_UPLO Uplo, \ const int M, const int N, \ const T alpha, const T *A, const int lda, \ const T *B, const int ldb, const T beta, \ T *C, const int ldc); ADEPT_DEFINE_SYMM(double) ADEPT_DEFINE_SYMM(float) #undef ADEPT_DEFINE_SYMM // Matrix-vector multiplication where the matrix is symmetric #define ADEPT_DEFINE_SYMV(T) \ void cppblas_symv(const BLAS_ORDER order, \ const BLAS_UPLO Uplo, \ const int N, const T alpha, const T *A, \ const int lda, const T *X, const int incX,\ const T beta, T *Y, const int incY); ADEPT_DEFINE_SYMV(double) ADEPT_DEFINE_SYMV(float) #undef ADEPT_DEFINE_SYMV // Matrix-vector multiplication for a general band matrix #define ADEPT_DEFINE_GBMV(T) \ void cppblas_gbmv(const BLAS_ORDER order, \ const BLAS_TRANSPOSE TransA, \ const int M, const int N, \ const int KL, const int KU, const T alpha,\ const T *A, const int lda, const T *X, \ const int incX, const T beta, T *Y, \ const int incY); ADEPT_DEFINE_GBMV(double) ADEPT_DEFINE_GBMV(float) #undef ADEPT_DEFINE_GBMV } // End namespace internal } // End namespace adept #endif ================================================ FILE: include/adept/eval.h ================================================ /* eval.h -- Convert expression to array to avoid aliasing issues Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptEval_H #define AdeptEval_H #include namespace adept { // Copy an expression to an Array of the same rank, type and // activeness template typename internal::enable_if<(E::rank > 0), Array >::type eval(const Expression& e) { Array a; a = e.cast(); return a; } // Equivalent for scalar expressions; not really needed /* template typename internal::enable_if::type eval(const Expression& e) { return static_cast(e); } template typename internal::enable_if >::type eval(const Expression& e) { return static_cast >(e); } */ } // End namespace adept #endif ================================================ FILE: include/adept/exception.h ================================================ /* exception.h -- Exceptions thrown by Adept library Copyright (C) 2012-2014 University of Reading Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. Adept functions can throw exceptions that are all derived either from the adept::autodiff_exception or adept::array_exception types, themselves inherited from the adept::exception type. All implement the "what()" function to return an error message. */ #ifndef AdeptException_H #define AdeptException_H 1 #include #include #include namespace adept { // ------------------------------------------------------------------- // adept::exception class from which all others are derived // ------------------------------------------------------------------- class exception : public std::exception { public: virtual const char* what() const throw() { return message_.c_str(); } virtual ~exception() throw() { } protected: std::string message_; }; class feature_not_available : public adept::exception { public: feature_not_available(const std::string& message = "Feature not available") { message_ = message; } }; // ------------------------------------------------------------------- // autodiff_exception and child classes // ------------------------------------------------------------------- // The autodiff_exception type is only used as a base for more // specific exceptions class autodiff_exception : public adept::exception { }; // Now we define the various specific autodiff exceptions that can // be thrown. class gradient_out_of_range : public autodiff_exception { public: gradient_out_of_range(const std::string& message = "Gradient index out of range: probably aReal objects have been created after a set_gradient(s) call") { message_ = message; } }; class gradients_not_initialized : public autodiff_exception { public: gradients_not_initialized(const std::string& message = "Gradients not initialized: at least one call to set_gradient(s) is needed before a forward or reverse pass") { message_ = message; } }; class stack_already_active : public autodiff_exception { public: stack_already_active(const std::string& message = "Attempt to activate an adept::Stack when one is already active in this thread") { message_ = message; } }; class dependents_or_independents_not_identified : public autodiff_exception { public: dependents_or_independents_not_identified(const std::string& message = "Dependent or independent variables not identified before a Jacobian computation") { message_ = message; } }; class wrong_gradient : public autodiff_exception { public: wrong_gradient(const std::string& message = "Wrong gradient: append_derivative_dependence called on a different aReal object from the most recent add_derivative_dependence call") { message_ = message; } }; class non_finite_gradient : public autodiff_exception { public: non_finite_gradient(const std::string& message = "A non-finite gradient has been computed") { message_ = message; } }; // ------------------------------------------------------------------- // array_exception and child classes // ------------------------------------------------------------------- // The array_exception type class array_exception : public adept::exception { public: array_exception(const std::string& message = "A misuse of arrays occurred") { message_ = message; } }; class size_mismatch : public array_exception { public: size_mismatch(const std::string& message = "Array sizes do not match in array expression") { message_ = message; } }; class inner_dimension_mismatch : public array_exception { public: inner_dimension_mismatch(const std::string& message = "Inner dimensions don't agree in matrix multiplication") { message_ = message; } }; class empty_array : public array_exception { public: empty_array(const std::string& message = "Use of empty array where non-empty array required") { message_ = message; } }; class invalid_dimension : public array_exception { public: invalid_dimension(const std::string& message = "Attempt to create array with invalid dimension") { message_ = message; } }; class index_out_of_bounds : public array_exception { public: index_out_of_bounds(const std::string& message = "Array index is out of bounds") { message_ = message; } }; class invalid_operation : public array_exception { public: invalid_operation(const std::string& message = "Operation not permitted for this type of array") { message_ = message; } }; class matrix_ill_conditioned : public array_exception { public: matrix_ill_conditioned(const std::string& message = "Matrix ill conditioned") { message_ = message; } }; class fortran_interoperability_error : public array_exception { public: fortran_interoperability_error(const std::string& message = "Fortran interoperability error") { message_ = message; } }; // ------------------------------------------------------------------- // optimization_exception // ------------------------------------------------------------------- // The optimization_exception type class optimization_exception : public adept::exception { public: optimization_exception(const std::string& message = "Optimization/minimization error") { message_ = message; } }; // ------------------------------------------------------------------- // Provide location of where exception was thrown // ------------------------------------------------------------------- // The following enables the file name and line number to be reported // with something like // throw array_exception("Bad matrix" ADEPT_EXCEPTION_LOCATION) #define ADEPT_EXCEPTION_LOCATION \ +adept::internal::exception_location(__FILE__,__LINE__) // A string with location information to append to the error message namespace internal { inline std::string exception_location(const char* file, int line) { std::stringstream s; s << " (in " << file << ":" << line << ")"; return s.str(); } } } // End namespace adept #endif ================================================ FILE: include/adept/interp.h ================================================ /* interp.h -- 1D interpolation Copyright (C) 2015- European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptInterp_H #define AdeptInterp_H #include namespace adept { namespace internal { typedef unsigned int uint; }; // The interpolation scheme and extrapolation behaviours are passed // in as one "options" argument with a bitwise OR. The lowest four // bits specify the extrapolation policy and the remaining bits the // interpolation scheme. static const internal::uint ADEPT_INTERPOLATE_LINEAR = 0u; // Default static const internal::uint ADEPT_INTERPOLATE_NEAREST = (1u<<4); static const internal::uint ADEPT_EXTRAPOLATE_DEFAULT = 0u; static const internal::uint ADEPT_EXTRAPOLATE_LINEAR = 1u; // Default for linear interp static const internal::uint ADEPT_EXTRAPOLATE_CLAMP = 2u; // Default for nearest-neighbour // Return a constant for out-of-bounds inputs, or NaN if the // constant is not specified static const internal::uint ADEPT_EXTRAPOLATE_CONSTANT = 3u; // A bitwise AND of the "options" argument with one of the following // will extract the component associated with interpolation and // extrapolation namespace internal { static const internal::uint ADEPT_EXTRAPOLATE_MASK = 15; // Binary 1111 static const internal::uint ADEPT_INTERPOLATE_MASK = ~ADEPT_EXTRAPOLATE_MASK; inline void extract_interp_extrap(uint options, uint& interp_scheme, uint& extrap_policy) { interp_scheme = options & ADEPT_INTERPOLATE_MASK; extrap_policy = options & ADEPT_EXTRAPOLATE_MASK; if (interp_scheme != ADEPT_INTERPOLATE_LINEAR && interp_scheme != ADEPT_INTERPOLATE_NEAREST) { throw array_exception("Interpolation scheme not understood"); } else if (extrap_policy > ADEPT_EXTRAPOLATE_CONSTANT) { throw array_exception("Extrapolation policy not understood"); } else if (interp_scheme == ADEPT_INTERPOLATE_NEAREST && extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { throw array_exception("Linear extrapolation not available with nearest-neighbour interpolation"); } else if (extrap_policy == ADEPT_EXTRAPOLATE_DEFAULT) { if (interp_scheme == ADEPT_INTERPOLATE_LINEAR) { extrap_policy = ADEPT_EXTRAPOLATE_LINEAR; } else { extrap_policy = ADEPT_EXTRAPOLATE_CLAMP; } } } // The dimensions of an array containing the data to be // interpolated may be described either by a vector of real // numbers, or by a regular range; any other type will not // compile. A regular range (which could be expressed by a // LinSpace object) has not yet been defined. template struct InterpHelper { static const bool is_valid = false; }; // Specialization for a vector of real numbers template struct InterpHelper > { static const bool is_valid = is_floating_point::value; template static void interp_get_indices_weights(const Array<1,XType,false>& x, const Array<1,XiType,false>& xi, internal::uint interp_scheme, internal::uint extrap_policy, Array<1,Index>& ind0, Array<1,Real,false>& weight0, Array<1,bool>& is_valid) { if (x(1) > x(0)) { // Normal ordering; loop over points to be interpolated for (Index i = 0; i < xi.size(); ++i) { const XiType xii = xi(i); if (xii >= x(0) && xii <= x(end)) { // Point is in the range of the interpolated function Index jj = 0; while (jj < x.size()-2 && x(jj+1) < xii) { ++jj; } ind0(i) = jj; weight0(i) = (x(jj+1)-xii)/(x(jj+1)-x(jj)); } else if (xii < x(0)) { // Point is off the low end of the scale ind0(i) = 0; if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { weight0(i) = (x(1)-xii)/(x(1)-x(0)); } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { weight0(i) = 1.0; } else { is_valid(i) = false; } } else { // Point is off the high end of the scale ind0(i) = x.size()-2; if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { weight0(i) = (x(end)-xii)/(x(end)-x(end-1)); } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { weight0(i) = 0.0; } else { is_valid(i) = false; } } } } else { // Reverse ordering; loop over points to be interpolated for (Index i = 0; i < xi.size(); ++i) { const XiType xii = xi(i); if (xii <= x(0) && xii >= x(end)) { // Point is in the range of the interpolated function Index jj = x.size()-2; while (jj > 0 && x(jj) < xii) { --jj; } ind0(i) = jj; weight0(i) = (x(jj+1)-xii)/(x(jj+1)-x(jj)); } else if (xii > x(0)) { // Point is off the scale (high in x, low in index) ind0(i) = 0; if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { weight0(i) = (x(1)-xii)/(x(1)-x(0)); } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { weight0(i) = 1.0; } else { is_valid(i) = false; } } else { // Point is off the scale (low in x, high in index) ind0(i) = x.size()-2; if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { weight0(i) = (x(end)-xii)/(x(end)-x(end-1)); } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { weight0(i) = 0.0; } else { is_valid(i) = false; } } } } // Not very efficient implementation of nearest-neighbour // interpolation: round the weights from linear interpolation if (interp_scheme == ADEPT_INTERPOLATE_NEAREST) { weight0 = round(weight0); } } }; } // 1D interpolation: interp1(x,y,xi) interpolates to obtain values of // y (whose first dimension is at the points in vector x) // interpolated to the values in vector xi. If y has more than one // dimension then multiple values are interpolated for every point // in xi, and the returned array has a size equal to y except that // the first dimension is of the same length as xi. If the // extrapolate policy is specified and is ADEPT_EXTRAPOLATE_CLAMP // then values outside the range will be clampted at the first or // last point. If it is ADEPT_EXTRAPOLATE_CONSTANT then a constant // value will be used which can be specified as the final argument, // or is a signaling NaN by default. Otherwise, linear // extrapolation is performed (the default). Note that x and xi must // be inactive variables, but y can be active in which case the // returned array will be too. template Array interp(const Array<1,XType,false>& x, const Array& y, const Array<1,XiType,false>& xi, internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT, YType extrap_value = std::numeric_limits::signaling_NaN()) { ExpressionSize ans_dims = y.dimensions(); ans_dims[0] = xi.size(); Array ans(ans_dims); if (x.size() != y.size(0)) { throw(size_mismatch("Interpolation vector x must have same length of first dimension of y in interp")); } else if (x.size() == 0) { throw(size_mismatch("Interpolation from empty vectors")); } else if (x.size() == 1) { // Input arrays are at a single point: copy this point into all // output points regardless of their x coordinate for (int ii = 0; ii < xi.size(); ++ii) { ans[ii] = y[0]; } return ans; } internal::uint interp_scheme, extrap_policy; internal::extract_interp_extrap(options, interp_scheme, extrap_policy); if (x(0) < x(1)) { // Normal ordering for (Index i = 0; i < xi.size(); i++) { Real xii = xi(i); Index jmin = 0; Index jmax = x.size()-1; if (xii <= x(0)) { if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { // Extrapolate leftwards jmax = 1; } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { // Clamp at first value ans[i] = y[0]; continue; } else { ans[i] = extrap_value; continue; } } else if (xii >= x(jmax)) { if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { // Extrapolate rightwards jmin = jmax-1; } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { // Clamp at final value ans[i] = y[jmax]; continue; } else { ans[i] = extrap_value; continue; } } else { // xii lies within x // Find pair in which xi sits while (jmax > jmin+1) { Index jmid = jmin + (jmax-jmin)/2; if (xii > x(jmid)) { jmin = jmid; } else { jmax = jmid; } } } if (interp_scheme == ADEPT_INTERPOLATE_LINEAR) { // Found value: linearly interpolate. Note that we need // square brackets here because ans and y may have more than // one dimension in which case we want to slice them // returning a lower dimensional array ans[i] = ((xii-x(jmin))*y[jmax] + (x(jmax)-xii)*y[jmin]) / (x(jmax)-x(jmin)); } else if (xii-x(jmin) > x(jmax)-xii) { // Nearest neighbour is at next point ans[i] = y[jmax]; } else { // Nearest neighbour is at previous point ans[i] = y[jmin]; } } } else { // Reverse ordering for (Index i = 0; i < xi.size(); i++) { Real xii = xi(i); Index jmin = 0; Index jmax = x.size()-1; if (xii >= x(0)) { if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { // Extrapolate leftwards jmax = 1; } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { // Clamp at first value ans[i] = y[0]; continue; } else { ans[i] = extrap_value; continue; } } else if (xii <= x(jmax)) { if (extrap_policy == ADEPT_EXTRAPOLATE_LINEAR) { // Extrapolate rightwards jmin = jmax-1; } else if (extrap_policy == ADEPT_EXTRAPOLATE_CLAMP) { // Clamp at last value ans[i] = y[jmax]; continue; } else { ans[i] = extrap_value; continue; } } else { // xii lies within x // Find pair in which xi sits while (jmax > jmin+1) { Index jmid = jmin + (jmax-jmin)/2; if (xii < x(jmid)) { jmin = jmid; } else { jmax = jmid; } } } if (interp_scheme == ADEPT_INTERPOLATE_LINEAR) { // Found value: linearly interpolate (all weights here are // negative) ans[i] = ((xii-x(jmin))*y[jmax] + (x(jmax)-xii)*y[jmin]) / (x(jmax)-x(jmin)); } else if (xii-x(jmin) < x(jmax)-xii) { // Nearest neighbour is at next point ans[i] = y[jmax]; } else { // Nearest neighbour is at previous point ans[i] = y[jmin]; } } } return ans; } // Ensure that 1D interpolation works if expressions are provided // for any of the arguments; these are converted to temporary // arrays. template Array interp(const Expression& x, const Expression& y, const Expression& xi, internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT, YType extrap_value = std::numeric_limits::signaling_NaN()) { const Array<1,XType,false> x2(x.cast()); const Array y2(y.cast()); const Array<1,XiType,false> xi2(xi.cast()); return interp(x2, y2, xi2, options, extrap_value); } // 1D logarithmic interpolation: interpolate log(Y) and then // exponentiate the result. template Array<1,YType,YIsActive> log_interp(const Array<1,XType,false>& x, const Array<1,YType,YIsActive>& y, const Array<1,XiType,false>& xi) { using std::exp; using std::log; int length = xi.size(); Array<1,YType,YIsActive> ans(length); if (x.size() != y.size()) { throw(size_mismatch("Interpolation vectors must be the same length in log_interp")); } if (x(0) < x(1)) { // Normal ordering for (Index i = 0; i < length; i++) { Real xii = xi(i); Index jmin = 0; Index jmax = x.size()-1; if (xii <= x(0)) { // Extrapolate leftwards jmax = 1; } else if (xii >= x(jmax)) { // Extrapolate rightwards jmin = jmax-1; } else { // xii lies within x // Find pair in which xi sits while (jmax > jmin+1) { Index jmid = jmin + (jmax-jmin)/2; if (xii > x(jmid)) { jmin = jmid; } else { jmax = jmid; } } } // Found value: logarithmically interpolate if (y(jmax) > 0.0 && y(jmin) > 0.0) { YType log_y_jmax = log(y(jmax)); YType log_y_jmin = log(y(jmin)); ans(i) = exp(((xii-x(jmin))*log_y_jmax + (x(jmax)-xii)*log_y_jmin) / (x(jmax)-x(jmin))); } else { // Interpolate linearly since one or both values is zero ans(i) = ((xii-x(jmin))*y(jmax) + (x(jmax)-xii)*y(jmin)) / (x(jmax)-x(jmin)); } } } else { // Reverse ordering for (Index i = 0; i < length; i++) { Real xii = xi(i); Index jmin = 0; Index jmax = x.size()-1; if (xii >= x(0)) { // Extrapolate leftwards jmax = 1; } else if (xii <= x(jmax)) { // Extrapolate rightwards jmin = jmax-1; } else { // xii lies within x // Find pair in which xi sits while (jmax > jmin+1) { Index jmid = jmin + (jmax-jmin)/2; if (xii < x(jmid)) { jmin = jmid; } else { jmax = jmid; } } } // Found value: logarithmically interpolate if (y(jmax) > 0.0 && y(jmin) > 0.0) { YType log_y_jmax = log(y(jmax)); YType log_y_jmin = log(y(jmin)); ans(i) = exp(((xii-x(jmin))*log_y_jmax + (x(jmax)-xii)*log_y_jmin) / (x(jmax)-x(jmin))); } else { // Interpolate linearly since one or both values is zero ans(i) = ((xii-x(jmin))*y(jmax) + (x(jmax)-xii)*y(jmin)) / (x(jmax)-x(jmin)); } } } return ans; } // 2D interpolation: as 1D interpolation but with two vectors // describing the dimensions of the interpolation array and two // vectors providing points at which interpolated values are // required template Array interp2d(const XType& x, const YType& y, const Array& M, const Array<1,XiType,false>& xi, const Array<1,YiType,false>& yi, internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT, MType extrap_value = std::numeric_limits::signaling_NaN()) { ADEPT_STATIC_ASSERT(MDims >= 2, TWO_DIMENSIONAL_INTERPOLATION_REQUIRES_2D_ARRAY); if (x.size() != M.size(0)) { throw(size_mismatch("Interpolation vector x must have same length as first dimension of M in interp2d")); } if (y.size() != M.size(1)) { throw(size_mismatch("Interpolation vector y must have same length as second dimension of M in interp2d")); } else if (x.size() < 2 || y.size() < 2) { throw(size_mismatch("Interpolation array must have at least two elements in each direction in interp2d")); } else if (xi.dimensions() != yi.dimensions()) { throw(size_mismatch("Indexing arrays must be the same shape in interp2d")); } internal::uint interp_scheme, extrap_policy; internal::extract_interp_extrap(options, interp_scheme, extrap_policy); Index ni = xi.size(); ExpressionSize ans_dims; ans_dims[0] = xi.size(); for (int ii = 2; ii < MDims; ++ii) { ans_dims[ii-1] = M.size(ii); } Array ans(ans_dims); // Indices to the first of the two elements in each dimension, and // the weight of the first element IntVector xind0(ni); Vector xweight0(ni); IntVector yind0(ni); Vector yweight0(ni); boolVector is_valid(ni); is_valid = true; internal::InterpHelper::interp_get_indices_weights(x, xi, interp_scheme, extrap_policy, xind0, xweight0, is_valid); internal::InterpHelper::interp_get_indices_weights(y, yi, interp_scheme, extrap_policy, yind0, yweight0, is_valid); /* std::cout << "xind0 " << xind0 << "\n"; std::cout << "xweight00 " << xweight0 << "\n"; std::cout << "yind0 " << yind0 << "\n"; std::cout << "yweight00 " << yweight0 << "\n"; */ for (Index ii = 0; ii < ni; ++ii) { if (is_valid(ii)) { // Bi-linear interpolation ans[ii] = yweight0(ii) * ( xweight0(ii) * M[xind0(ii)][yind0(ii)] +(1.0-xweight0(ii)) * M[xind0(ii)+1][yind0(ii)]) + (1.0-yweight0(ii)) * ( xweight0(ii) * M[xind0(ii)][yind0(ii)+1] +(1.0-xweight0(ii)) * M[xind0(ii)+1][yind0(ii)+1]); } else { ans[ii] = extrap_value; } } return ans; } // Ensure that 2D interpolation works if expressions are provided // for any of the arguments; these are converted to temporary // arrays. template Array interp2d(const Expression& x, const Expression& y, const Expression& m, const Expression& xi, const Expression& yi, internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT, MType extrap_value = std::numeric_limits::signaling_NaN()) { const Array<1,XType,false> x2(x.cast()); const Array<1,YType,false> y2(y.cast()); const Array m2(m.cast()); const Array<1,XiType,false> xi2(xi.cast()); const Array<1,YiType,false> yi2(yi.cast()); return interp2d(x2, y2, m2, xi2, yi2, options, extrap_value); } // 3D interpolation: as 1D interpolation but with two vectors // describing the dimensions of the interpolation array and two // vectors providing points at which interpolated values are // required template Array interp3d(const XType& x, const YType& y, const ZType& z, const Array& M, const Array<1,XiType,false>& xi, const Array<1,YiType,false>& yi, const Array<1,ZiType,false>& zi, internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT, MType extrap_value = std::numeric_limits::signaling_NaN()) { ADEPT_STATIC_ASSERT(MDims >= 3, THREE_DIMENSIONAL_INTERPOLATION_REQUIRES_3D_ARRAY); if (x.size() != M.size(0)) { throw(size_mismatch("Interpolation vector x must have same length as first dimension of M in interp3d")); } if (y.size() != M.size(1)) { throw(size_mismatch("Interpolation vector y must have same length as second dimension of M in interp3d")); } if (z.size() != M.size(2)) { throw(size_mismatch("Interpolation vector z must have same length as third dimension of M in interp3d")); } else if (x.size() < 2 || y.size() < 2 || z.size() < 2) { throw(size_mismatch("Interpolation array must have at least two elements in each direction in interp3d")); } else if (xi.dimensions() != yi.dimensions() || xi.dimensions() != zi.dimensions()) { throw(size_mismatch("Indexing arrays must be the same shape in interp3d")); } internal::uint interp_scheme, extrap_policy; internal::extract_interp_extrap(options, interp_scheme, extrap_policy); Index ni = xi.size(); ExpressionSize ans_dims; ans_dims[0] = xi.size(); for (int ii = 3; ii < MDims; ++ii) { ans_dims[ii-2] = M.size(ii); } Array ans(ans_dims); // Indices to the first of the two elements in each dimension, and // the weight of the first element IntVector xind0(ni); Vector xweight0(ni); IntVector yind0(ni); Vector yweight0(ni); IntVector zind0(ni); Vector zweight0(ni); boolVector is_valid(ni); is_valid = true; internal::InterpHelper::interp_get_indices_weights(x, xi, interp_scheme, extrap_policy, xind0, xweight0, is_valid); internal::InterpHelper::interp_get_indices_weights(y, yi, interp_scheme, extrap_policy, yind0, yweight0, is_valid); internal::InterpHelper::interp_get_indices_weights(z, zi, interp_scheme, extrap_policy, zind0, zweight0, is_valid); for (Index ii = 0; ii < ni; ++ii) { if (is_valid(ii)) { // Tri-linear interpolation ans[ii] = xweight0(ii) * (yweight0(ii) * (zweight0(ii) * M[xind0(ii)][yind0(ii)][zind0(ii)] +(1.0-zweight0(ii)) * M[xind0(ii)][yind0(ii)][zind0(ii)+1]) + (1.0-yweight0(ii)) * (zweight0(ii) * M[xind0(ii)][yind0(ii)+1][zind0(ii)] +(1.0-zweight0(ii)) * M[xind0(ii)][yind0(ii)+1][zind0(ii)+1])) + (1.0 - xweight0(ii)) * (yweight0(ii) * (zweight0(ii) * M[xind0(ii)+1][yind0(ii)][zind0(ii)] +(1.0-zweight0(ii)) * M[xind0(ii)+1][yind0(ii)][zind0(ii)+1]) + (1.0-yweight0(ii)) * (zweight0(ii) * M[xind0(ii)+1][yind0(ii)+1][zind0(ii)] +(1.0-zweight0(ii)) * M[xind0(ii)+1][yind0(ii)+1][zind0(ii)+1])); } else { ans[ii] = extrap_value; } } return ans; } // Ensure that 3D interpolation works if expressions are provided // for any of the arguments; these are converted to temporary // arrays. template Array interp3d(const Expression& x, const Expression& y, const Expression& z, const Expression& m, const Expression& xi, const Expression& yi, const Expression& zi, internal::uint options = ADEPT_INTERPOLATE_LINEAR | ADEPT_EXTRAPOLATE_DEFAULT, MType extrap_value = std::numeric_limits::signaling_NaN()) { const Array<1,XType,false> x2(x.cast()); const Array<1,YType,false> y2(y.cast()); const Array<1,ZType,false> z2(z.cast()); const Array m2(m.cast()); const Array<1,XiType,false> xi2(xi.cast()); const Array<1,YiType,false> yi2(yi.cast()); const Array<1,ZiType,false> zi2(zi.cast()); return interp3d(x2, y2, z2, m2, xi2, yi2, zi2, options, extrap_value); } } // End namespace adept #endif ================================================ FILE: include/adept/inv.h ================================================ /* inv.h -- Invert matrices Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptInv_H #define AdeptInv_H 1 #include #include #include namespace adept { // ------------------------------------------------------------------- // Invert general square matrix A // ------------------------------------------------------------------- template Array<2,Type,false> inv(const Array<2,Type,false>& A); // ------------------------------------------------------------------- // Invert symmetric matrix A // ------------------------------------------------------------------- template SpecialMatrix,false> inv(const SpecialMatrix,false>& A); // ------------------------------------------------------------------- // Invert arbitrary expression // ------------------------------------------------------------------- template typename internal::enable_if::value, Array<2,Type,false> >::type inv(const Expression& A) { Array<2,Type,false> array = A.cast(); return inv(array); } } #endif ================================================ FILE: include/adept/matmul.h ================================================ /* matmul.h -- Matrix multiplication capability Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptMatmul_H #define AdeptMatmul_H #include #include #include #include namespace adept { namespace internal { // --------------------------------------------------------------------- // Helper functions for checking dimensions // --------------------------------------------------------------------- template inline void check_inner_dimensions(const L& left, const R& right) { if (left.empty() || right.empty()) { throw empty_array("Attempt to perform matrix multiplication with empty array(s)" ADEPT_EXCEPTION_LOCATION); } if (left.dimension(1) != right.dimension(0)) { throw inner_dimension_mismatch("Inner dimension mismatch in array multiplication" ADEPT_EXCEPTION_LOCATION); } } template inline void check_inner_dimensions_sqr(Index left_dim, const R& right) { if (left_dim == 0 || right.empty()) { throw empty_array("Attempt to perform matrix multiplication with empty array(s)" ADEPT_EXCEPTION_LOCATION); } if (left_dim != right.dimension(0)) { throw inner_dimension_mismatch("Inner dimension mismatch in array multiplication" ADEPT_EXCEPTION_LOCATION); } } // --------------------------------------------------------------------- // Underlying functions // --------------------------------------------------------------------- // Dense matrix-vector multiplication template inline Array<1,T,(LIsActive||RIsActive)> matmul_(const Array<2,T,LIsActive>& left, const Array<1,T,RIsActive>& right) { static const bool is_active = LIsActive || RIsActive; check_inner_dimensions(left, right); Array<1,T,is_active> ans(left.dimension(0)); Index stride; BLAS_ORDER order; if (!left.is_row_contiguous() && !left.is_column_contiguous()) { // Matrix is strided in both directions so needs to be copied // first Array<2,T,LIsActive> left_; left_ = left; return matmul_(left_, right); } else if (left.is_row_contiguous()) { order = BlasRowMajor; stride = left.offset(0); } else { order = BlasColMajor; stride = left.offset(1); } cppblas_gemv(order, BlasNoTrans, left.dimension(0), left.dimension(1), 1.0, left.const_data(), stride, right.const_data(), right.offset(0), 0.0, ans.data(), ans.offset(0)); if (is_active #ifdef ADEPT_RECORDING_PAUSABLE && ADEPT_ACTIVE_STACK->is_recording() #endif ) { uIndex left_index = left.gradient_index(); uIndex right_index = right.gradient_index(); uIndex ans_index = ans.gradient_index(); Index n = right.dimension(0); const ExpressionSize<2>& left_offset = left.offset(); const ExpressionSize<1>& right_offset = right.offset(); for (Index i = 0; i < ans.dimension(0); ++i) { if (LIsActive) { active_stack()->push_derivative_dependence(left_index+i*left_offset[0], right.const_data(), n, left_offset[1], right_offset[0]); } if (RIsActive) { active_stack()->push_derivative_dependence(right_index, left.const_data()+i*left_offset[0], n, right_offset[0], left_offset[1]); } active_stack()->push_lhs(ans_index + i*ans.offset(0)); } } return ans; } // Dense matrix-matrix multiplication template inline Array<2,T,(LIsActive||RIsActive)> matmul_(const Array<2,T,LIsActive>& left, const Array<2,T,RIsActive>& right) { static const bool is_active = LIsActive || RIsActive; check_inner_dimensions(left, right); if (!left.is_row_contiguous() && !left.is_column_contiguous()) { Array<2,T,LIsActive> left_; left_ = left; if (!right.is_row_contiguous() && !right.is_column_contiguous()) { Array<2,T,RIsActive> right_; right_ = right; return matmul_(left_, right_); } else { return matmul_(left_, right); } } else if (!right.is_row_contiguous() && !right.is_column_contiguous()) { Array<2,T,RIsActive> right_; right_ = right; return matmul_(left, right_); } else { Index left_stride, right_stride, ans_stride; BLAS_TRANSPOSE left_trans, right_trans; BLAS_ORDER order; Array<2,T,is_active> ans(left.dimension(0),right.dimension(1)); if (ans.is_row_contiguous()) { order = BlasRowMajor; ans_stride = ans.offset(0); } else { order = BlasColMajor; ans_stride = ans.offset(1); } if (left.is_row_contiguous()) { left_trans = order == BlasRowMajor ? BlasNoTrans : BlasTrans; left_stride = left.offset(0); } else { left_trans = order == BlasColMajor ? BlasNoTrans : BlasTrans; left_stride = left.offset(1); } if (right.is_row_contiguous()) { right_trans = order == BlasRowMajor ? BlasNoTrans : BlasTrans; right_stride = right.offset(0); } else { right_trans = order == BlasColMajor ? BlasNoTrans : BlasTrans; right_stride = right.offset(1); } cppblas_gemm(order, left_trans, right_trans, left.dimension(0), right.dimension(1), left.dimension(1), 1.0, left.const_data(), left_stride, right.const_data(), right_stride, 0.0, ans.data(), ans_stride); if ( (LIsActive || RIsActive) #ifdef ADEPT_RECORDING_PAUSABLE && ADEPT_ACTIVE_STACK->is_recording() #endif ) { uIndex left_index = left.gradient_index(); uIndex right_index = right.gradient_index(); uIndex ans_index = ans.gradient_index(); Index n = right.dimension(0); const ExpressionSize<2>& left_offset = left.offset(); const ExpressionSize<2>& right_offset = right.offset(); for (Index i = 0; i < ans.dimension(0); ++i) { for (Index j = 0; j < ans.dimension(1); ++j) { if (LIsActive) { active_stack()->push_derivative_dependence(left_index+i*left_offset[0], right.const_data()+j*right_offset[1], n, left_offset[1], right_offset[0]); } if (RIsActive) { active_stack()->push_derivative_dependence(right_index+j*right_offset[1], left.const_data()+i*left_offset[0], n, right_offset[0], left_offset[1]); } active_stack()->push_lhs(ans_index + i*ans.offset(0) + j*ans.offset(1)); } } } return ans; } } // Symmetric matrix-vector multiplication template inline Array<1,T,(LIsActive||RIsActive)> matmul_symmetric(const T* left_ptr, SymmMatrixOrientation left_orient, Index left_dim, Index left_offset, uIndex left_gradient_index, const Array<1,T,RIsActive>& right) { check_inner_dimensions_sqr(left_dim, right); if (LIsActive || RIsActive) { throw(invalid_operation("Cannot yet do matmul(SymmMatrix,Vector) when either are active")); } BLAS_UPLO uplo; if (left_orient == ROW_LOWER_COL_UPPER) { uplo = BlasLower; } else { uplo = BlasUpper; } Array<1,T,LIsActive||RIsActive> ans(right.dimension(0)); cppblas_symv(BlasRowMajor, uplo, right.dimension(0), 1.0, left_ptr, left_offset, right.const_data(), right.offset(0), 0.0, ans.data(), ans.offset(0)); return ans; } // Symmetric matrix-matrix multiplication template inline Array<2,T,(LIsActive||RIsActive)> matmul_symmetric(const T* left_ptr, SymmMatrixOrientation left_orient, Index left_dim, Index left_offset, uIndex left_gradient_index, const Array<2,T,RIsActive>& right) { check_inner_dimensions_sqr(left_dim, right); if (LIsActive || RIsActive) { throw(invalid_operation("Cannot yet do matmul(SymmMatrix,Matrix) when either are active")); } if (!right.is_row_contiguous() && !right.is_column_contiguous()) { Array<2,T,RIsActive> right_; right_ = right; return matmul_symmetric(left_ptr, left_orient, left_dim, left_offset, left_gradient_index, right_); } else { BLAS_ORDER order; BLAS_UPLO uplo; Index right_stride, ans_stride; Array<2,T,LIsActive||RIsActive> ans; if (right.is_row_contiguous()) { order = BlasRowMajor; uplo = left_orient == ROW_LOWER_COL_UPPER ? BlasLower : BlasUpper; right_stride = right.offset(0); ans.resize_row_major(right.dimensions()); ans_stride = ans.offset(0); } else { order = BlasColMajor; uplo = left_orient == ROW_LOWER_COL_UPPER ? BlasUpper : BlasLower; right_stride = right.offset(1); ans.resize_column_major(right.dimensions()); ans_stride = ans.offset(1); } cppblas_symm(order, BlasLeft, uplo, right.dimension(0), right.dimension(1), 1.0, left_ptr, left_offset, right.const_data(), right_stride, 0.0, ans.data(), ans_stride); return ans; } } // Band matrix-vector multiplication template inline Array<1,T,(LIsActive||RIsActive)> matmul_band(const T* left_ptr, MatrixStorageOrder left_order, Index LDiags, Index UDiags, Index left_dim, Index left_offset, uIndex left_gradient_index, const Array<1,T,RIsActive>& right) { check_inner_dimensions_sqr(left_dim, right); if (LIsActive) { throw(invalid_operation("Cannot yet do matmul(BandMatrix,Vector) for active BandMatrix")); } BLAS_ORDER order; // BLAS declares the start pointer to be in the "missing data" // zone, so we need to subtract from the address of the top-left // corner of the matrix const T* left_start; if (left_order == ROW_MAJOR) { order = BlasRowMajor; left_start = left_ptr-UDiags; } else { order = BlasColMajor; left_start = left_ptr-LDiags; } Array<1,T,(LIsActive||RIsActive)> ans(right.dimension(0)); cppblas_gbmv(order, BlasNoTrans, left_dim, left_dim, LDiags, UDiags, 1.0, left_start, left_offset+1, right.const_data(), right.offset(0), 0.0, ans.data(), ans.offset(0)); if (RIsActive) { uIndex right_index = right.gradient_index(); uIndex ans_index = ans.gradient_index(); if (left_order == ROW_MAJOR) { for (Index i = 0; i < ans.dimension(0); ++i) { // Using info from BandEngine::get_row_range in // SpecialMatrix.h Index j_start = ileft_dim ? left_dim : i+UDiags+1; Index n = j_end_plus_1 - j_start; Index index_start = i*left_offset + j_start; Index index_stride = 1; active_stack()->push_derivative_dependence(right_index + j_start, left_ptr+index_start, n, right.offset(0), index_stride); active_stack()->push_lhs(ans_index + i*ans.offset(0)); } } else { for (Index i = 0; i < ans.dimension(0); ++i) { // Using info from BandEngine::get_row_range in // SpecialMatrix.h Index j_start = ileft_dim ? left_dim : i+UDiags+1; Index n = j_end_plus_1 - j_start; Index index_start = i + j_start*left_offset; Index index_stride = left_offset; active_stack()->push_derivative_dependence(right_index + j_start, left_ptr+index_start, n, right.offset(0), index_stride); active_stack()->push_lhs(ans_index + i*ans.offset(0)); } } } return ans; } // Matrix-matrix multiplication with a band matrix on the left, // achieved by repeated matrix-vector multiplications template inline Array<2,T,(LIsActive||RIsActive)> matmul_band(const T* left_ptr, MatrixStorageOrder left_order, Index LDiags, Index UDiags, Index left_dim, Index left_offset, uIndex left_gradient_index, const Array<2,T,RIsActive>& right) { check_inner_dimensions_sqr(left_dim, right); if (LIsActive || RIsActive) { throw(invalid_operation("Cannot yet do matmul(BandMatrix,Matrix) when either are active")); } BLAS_ORDER order; // BLAS declares the start pointer to be in the "missing data" // zone, so we need to subtract from the address of the top-left // corner of the matrix const T* left_start; if (left_order == ROW_MAJOR) { order = BlasRowMajor; left_start = left_ptr-UDiags; } else { order = BlasColMajor; left_start = left_ptr-LDiags; } Array<2,T,(LIsActive||RIsActive)> ans(right.dimension(0),right.dimension(1)); for (Index i = 0; i < right.dimension(1); ++i) { cppblas_gbmv(order, BlasNoTrans, left_dim, left_dim, LDiags, UDiags, 1.0, left_start, left_offset+1, right.const_data()+i*right.offset(1), right.offset(0), 0.0, ans.data()+i*ans.offset(1), ans.offset(0)); } return ans; } // --------------------------------------------------------------------- // Versions of matmul_ implemented in terms of the underlying functions // --------------------------------------------------------------------- // Dense vector-matrix multiplication is evaluated by swapping and // transposing the arguments template inline Array<1,T,(LIsActive||RIsActive)> matmul_(const Array<1,T,LIsActive>& left, const Array<2,T,RIsActive>& right) { return matmul_(right.T(), left); } // Symmetric matrix-vector and matrix-matrix multiplication template inline Array matmul_(const SpecialMatrix,LIsActive>& left, const Array& right) { return matmul_symmetric(left.const_data(), LOrient, left.dimension(0), left.offset(), left.gradient_index(), right); } // Vector multiplied by symmetric matrix: swap and transpose the arguments template inline Array<1,T,(LIsActive||RIsActive)> matmul_(const Array<1,T,LIsActive>& left, const SpecialMatrix,RIsActive>& right) { return matmul_symmetric(right.const_data(), ROrient, right.dimension(0), right.offset(), right.gradient_index(), left); } // Dense matrix multiplied by symmetric matrix: swap and transpose // the arguments, then transpose the result template inline Array<2,T,(LIsActive||RIsActive)> matmul_(const Array<2,T,LIsActive>& left, const SpecialMatrix,RIsActive>& right) { return matmul_symmetric(right.const_data(), ROrient, right.dimension(0), right.offset(), right.gradient_index(), left.T()).T(); } // Band matrix-vector and matrix-matrix multiplication template inline Array matmul_(const SpecialMatrix,LIsActive>& left, const Array& right) { return matmul_band(left.const_data(), LOrder, LDiags, UDiags, left.dimension(0), left.offset(), left.gradient_index(), right); } // Vector multiplied by band matrix: swap and transpose the arguments template inline Array<1,T,(LIsActive||RIsActive)> matmul_(const Array<1,T,LIsActive>& left, const SpecialMatrix,RIsActive>& right) { static const MatrixStorageOrder new_r_order = ROrder == ROW_MAJOR ? COL_MAJOR : ROW_MAJOR; return matmul_band(right.const_data(), new_r_order, UDiags, LDiags, right.dimension(0), right.offset(), right.gradient_index(), left); } // Dense matrix multiplied by band matrix: swap and transpose the // arguments, then transpose the result template inline Array<2,T,(LIsActive||RIsActive)> matmul_(const Array<2,T,LIsActive>& left, const SpecialMatrix,RIsActive>& right) { static const MatrixStorageOrder new_r_order = ROrder == ROW_MAJOR ? COL_MAJOR : ROW_MAJOR; return matmul_band(right.const_data(), new_r_order, UDiags, LDiags, right.dimension(0), right.offset(), right.gradient_index(), left.T()).T(); } // --------------------------------------------------------------------- // promote_array: helper function to change type of array and // convert expressions to arrays // --------------------------------------------------------------------- // If the argument is not an l-value then convert it to a dense // array of the same rank template inline typename internal::enable_if >::type promote_array(const Expression& arg) { return Array(arg); } // If the argument is a dense array then convert it to the new // type; this will only involve a copy of the raw data if the type // is changed, otherwise the new array will simply link to the old // one template inline Array promote_array(const Array& arg) { return Array(const_cast&>(arg)); } #ifdef ADEPT_ONLY_DIFFERENTIATE_DENSE_MATRIX_MULTIPLICATION // If the argument is an active special matrix then it must be // copied to a dense "Array" because differentiation of the // various types of special matrix (symmetric, band, upper, lower // etc) is not yet implemented. template inline Array<2,NewType,true> promote_array(const SpecialMatrix& arg) { return Array<2,NewType,true>( const_cast&>(arg)); } // If the argument is an inactive symmetric or band matrix then // convert the element type; this will only involve a copy of the // raw data if the type is changed, otherwise the new array will // simply link to the old template inline SpecialMatrix,false> promote_array(const SpecialMatrix,false>& arg) { return SpecialMatrix,false>( const_cast,false>&>(arg)); } template inline SpecialMatrix,false> promote_array(const SpecialMatrix,false>& arg) { return SpecialMatrix,false>( const_cast,false>&>(arg)); } // For other special matrices (square and triangular), specific // matrix multiplication functions have not yet been added, so we // have to convert to a dense array first template inline Array<2,NewType,false> promote_array(const SpecialMatrix& arg) { return Array<2,NewType,false>( const_cast&>(arg)); } #else // The following assumes that the Adept library knows how to // differentiate special matrices: currently it doesn't so this // path is likely to throw a run-time exception. template inline SpecialMatrix promote_array(const SpecialMatrix& arg) { return SpecialMatrix( const_cast&>(arg)); } #endif // If the argument is a fixed array of a different type then copy it template inline typename enable_if::value, Array::rank, NewType,IsActive> >::type promote_array(const FixedArray& arg) { return Array::rank, NewType,IsActive>(const_cast&>(arg)); } // If the argument is a fixed array of the same type then link to it template inline typename enable_if::value, Array::rank, NewType,IsActive> >::type promote_array(const FixedArray& arg) { return Array::rank,NewType,IsActive> (const_cast&>(arg).data(), 0, arg.dimensions(), arg.offset(), arg.gradient_index()); } } // End namespace internal // --------------------------------------------------------------------- // matmul function: replicates Fortran-90 equivalent // --------------------------------------------------------------------- // If either argument is not an lvalue (i.e. is an array expression // rather than an array) then convert it into a dense array template inline typename internal::enable_if<(L::rank == 1 || L::rank == 2) && (R::rank == 1 || R::rank == 2) && (L::rank+R::rank > 2), Array::type, L::is_active||R::is_active> >::type matmul(const Expression& left, const Expression& right) { typedef typename internal::promote::type type; return internal::matmul_(internal::promote_array(left.cast()), internal::promote_array(right.cast())); } // --------------------------------------------------------------------- // Implement "**" pseudo-operator for matrix multiplication // --------------------------------------------------------------------- // In order for A**B to lead to matrix multiplication, *B will // return a MatmulRHS object, and A*[a MatmulRHS object] will send // the two arguments to the matmul function namespace internal { // The MatmulRHS class simply contains a reference to an array template struct MatmulRHS { // The following are not used but enable // expr_cast::... to work static const int rank = A::rank; static const bool is_active = A::is_active; static const int n_arrays = 0; static const bool n_active = 0; static const bool is_lvalue = false; static const bool is_vectorizable = false; static const int n_scratch = 0; // The following are necessary in order that other binary // operator* functions can compile, even if they are rejected // for a particular multiplication typedef typename A::type type; typedef bool _adept_expression_flag; // Constructor simply saves a reference to the expression // argument MatmulRHS(const A& a) : array(a) { } const A& array; }; } // Dereference operator returns a MatmulRHS object template inline typename internal::enable_if<(A::rank == 1 || A::rank == 2), internal::MatmulRHS >::type operator*(const Expression& a) { return internal::MatmulRHS(a.cast()); } // Multiply operator with a MatmulRHS object on the right-hand-side // will call the matmul function template inline Array::type, (L::is_active||R::is_active)> operator*(const Expression& left, const internal::MatmulRHS& right) { return matmul(left.cast(),right.array.cast()); } } // End namespace adept #endif ================================================ FILE: include/adept/noalias.h ================================================ /* noalias.h -- Wrap an expression so that alias checking is not performed Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptNoalias_H #define AdeptNoalias_H #include namespace adept { namespace internal { // No-alias wrapper for enabling noalias() template struct NoAlias : public Expression > { static const int rank = R::rank; static const bool is_active = R::is_active; static const int n_active = R::n_active; static const int n_scratch = R::n_scratch; static const int n_arrays = R::n_arrays; static const bool is_vectorizable = R::is_vectorizable; const R& arg; NoAlias(const Expression& arg_) : arg(arg_.cast()) { } template bool get_dimensions_(ExpressionSize& dim) const { return arg.get_dimensions(dim); } // Index get_dimension_with_len(Index len) const { // return arg.get_dimension_with_len_(len); // } std::string expression_string_() const { std::string str = "noalias("; str += static_cast(&arg)->expression_string() + ")"; return str; } bool is_aliased_(const Type* mem1, const Type* mem2) const { return false; } bool all_arrays_contiguous_() const { return arg.all_arrays_contiguous_(); } bool is_aligned_() const { return arg.is_aligned_(); } template int alignment_offset_() const { return arg.template alignment_offset_(); } template Type value_with_len_(Index i, Index len) const { return operation(arg.value_with_len(i, len)); } template void advance_location_(ExpressionSize& loc) const { arg.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return arg.template value_at_location_(loc); } template Packet packet_at_location_(const ExpressionSize& loc) const { return arg.template packet_at_location_(loc); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return arg.template value_at_location_store_(loc, scratch); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { arg.template calc_gradient_(stack, loc, scratch); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { arg.template calc_gradient_(stack, loc, scratch, multiplier); } template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { arg.template set_location_(i, index); } }; // End struct NoAlias } template inline adept::internal::NoAlias noalias(const Expression& r) { return adept::internal::NoAlias(r.cast()); } template inline typename internal::enable_if::value, Type>::type noalias(const Type& r) { return r; } } #endif ================================================ FILE: include/adept/outer_product.h ================================================ /* outer_product.h -- Compute the outer product of two vectors Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptOuterProduct_H #define AdeptOuterProduct_H #include #include namespace adept { namespace internal { // Expression representing an outer product template class OuterProduct : public Expression > { typedef Array<1,LType,L::is_active> LArray; typedef Array<1,RType,R::is_active> RArray; public: // Static data static const int rank = 2; static const bool is_active = L::is_active || R::is_active; static const int store_result = is_active; static const int n_active = LArray::n_active + RArray::n_active; static const int n_local_scratch = store_result; static const int n_scratch = n_local_scratch + LArray::n_scratch + RArray::n_scratch; static const int n_arrays = LArray::n_arrays + RArray::n_arrays; // Currently not vectorizable because the current design always // has the array index increasing // static const bool is_vectorizable = is_same::value; static const bool is_vectorizable = false;//is_same::value; protected: // DATA: need to store actual arrays to avoid temporaries going // out of scope before they're used; note that if an array is // passed in then a shallow copy is made. const LArray left; const RArray right; public: OuterProduct(const Expression& left_, const Expression& right_) : left(left_.cast()), right(right_.cast()) { } bool get_dimensions_(ExpressionSize<2>& dim) const { dim[0] = left.size(); dim[1] = right.size(); return dim[0] > 0 && dim[1] > 0; } std::string expression_string_() const { return "outer_product(" + left.expression_string() + "," + right.expression_string() + ")"; } bool is_aliased_(const Type* mem1, const Type* mem2) const { return false; } bool all_arrays_contiguous_() const { return right.all_arrays_contiguous_(); } bool is_aligned_() const { return right.is_aligned_(); } template int alignment_offset_() const { return right.template alignment_offset_(); } // Do not implement value_with_len_ // Advance the row only, so the left vector is not advanced template void advance_location_(ExpressionSize& loc) const { right.template advance_location_(loc); } template Type value_at_location_(const ExpressionSize& loc) const { return left.template value_at_location_(loc) * right.template value_at_location_(loc); } // This does not work because the array index is always // increased which it shouldn't be for the left vector. For this // reason, vectorization is turned off (see is_vectorizable // above) template Packet packet_at_location_(const ExpressionSize& loc) const { // The LHS of the following multiplication returns a packet // containing repeated values of the left vector at one // location return Packet(left.template value_at_location_(loc)) // <- fix! * right.template packet_at_location_(loc); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return scratch[MyScratchNum] = left.template value_at_location_store_(loc, scratch) * right.template value_at_location_store_(loc, scratch); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return scratch[MyScratchNum]; } template void set_location_(const ExpressionSize<2>& i, ExpressionSize& index) const { left.template set_location_(ExpressionSize<1>(i[0]), index); right.template set_location_(ExpressionSize<1>(i[1]), index); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { calc_left_ (stack, left, loc, scratch); calc_right_(stack, right, loc, scratch); } // As the previous but multiplying the gradient by "multiplier" template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { calc_left_ (stack, left, loc, scratch, multiplier); calc_right_(stack, right, loc, scratch, multiplier); } protected: // Only calculate gradients for left and right arguments if they // are active; otherwise do nothing template typename enable_if::type calc_left_(Stack& stack, const MyLType& left, const ExpressionSize& loc, const ScratchVector& scratch) const { Multiply::template calc_left(stack, left, right, loc, scratch); } template typename enable_if::type calc_left_(Stack& stack, const MyLType& left, const ExpressionSize& loc, const ScratchVector& scratch) const { } template typename enable_if::type calc_right_(Stack& stack, const MyRType& right, const ExpressionSize& loc, const ScratchVector& scratch) const { Multiply::template calc_right(stack, left, right, loc, scratch); } template typename enable_if::type calc_right_(Stack& stack, const MyRType& right, const ExpressionSize& loc, const ScratchVector& scratch) const { } template typename enable_if::type calc_left_(Stack& stack, const MyLType& left, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { Multiply::template calc_left(stack, left, right, loc, scratch, multiplier); } template typename enable_if::type calc_left_(Stack& stack, const MyLType& left, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } template typename enable_if::type calc_right_(Stack& stack, const MyRType& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { Multiply::template calc_right(stack, left, right, loc, scratch, multiplier); } template typename enable_if::type calc_right_(Stack& stack, const MyRType& right, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { } }; } // Define outer_product function template internal::OuterProduct::type,LType,L,RType,R> outer_product(const Expression& l, const Expression& r) { return internal::OuterProduct::type, LType,L,RType,R>(l,r); } } #endif ================================================ FILE: include/adept/quick_e.h ================================================ /* quick_e.h -- Fast exponential function for Intel and ARM intrinsics Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library, although can be used stand-alone. The exponential function for real arguments is used in many areas of physics, yet is not vectorized by many compilers. This C++ header file provides a fast exponential function (quick_e::exp) for single and double precision floating point numbers, Intel intrinsics representing packets of 2, 4, 8 and 16 such numbers, and ARM NEON intrinsics representing 2 doubles or 4 floats. The algorithm has been taken from Agner Fog's Vector Class Library. It is designed to be used in other libraries that make use of Intel or ARM intrinsics. Since such libraries often define their own classes for representing vectors of numbers, this file does not define any such classes itself. Also in the namespace quick_e, this file defines the following inline functions that work on intrinsics of type "Vec" and the corresponding scalar type "Sca": Vec add(Vec x, Vec y) Add the elements of x and y Vec sub(Vec x, Vec y) Subtract the elements of x and y Vec mul(Vec x, Vec y) Multiply the elements of x and y Vec div(Vec x, Vec y) Divide the elements of x and y Vec set0() Returns zero in all elements Vec set1(Sca a) Returns all elements set to a Vec sqrt(Vec x) Square root of all elements Vec fmin(Vec x, Vec y) Minimum of elements of x and y Vec fmax(Vec x, Vec y) Maximum of elements of x and y Vec load(const Sca* d) Aligned load from memory location d Vec loadu(const Sca* d) Unaligned load from memory location d void store(Sca* d, Vec x) Aligned store of x to d void storeu(Sca* d, Vec x) Unaligned store of x to d Sca hsum(Vec x) Horizontal sum of elements of x Sca hmul(Vec x) Horizontal product of elements of x Sca hmin(Vec x) Horizontal minimum of elements of x Sca hmax(Vec x) Horizontal maximum of elements of x Vec fma(Vec x, Vec y, Vec z) Fused multiply-add: (x*y)+z Vec fnma(Vec x, Vec y, Vec z) Returns z-(x*y) Vec pow2n(Vec x) Returns 2 to the power of x Vec exp(Vec x) Returns exponential of x */ #ifndef QuickE_H #define QuickE_H 1 #include // Microsoft compiler doesn't define __SSE2__ even if __AVX__ is // defined #ifdef __AVX__ #ifndef __SSE2__ #define __SSE2__ 1 #endif #endif // Headers needed for x86 vector intrinsics #ifdef __SSE2__ #include // SSE #include // SSE2 // Numerous platforms don't define _mm_undefined_ps in xmmintrin.h, // so we assume none do, except GCC >= 4.9.1 and CLANG >= 3.8.0. // Those that don't use an equivalent function that sets the // elements to zero. #define QE_MM_UNDEFINED_PS _mm_setzero_ps #ifdef __clang__ #if __has_builtin(__builtin_ia32_undef128) #undef QE_MM_UNDEFINED_PS #define QE_MM_UNDEFINED_PS _mm_undefined_ps #endif #elif defined(__GNUC__) #define GCC_VERSION (__GNUC__ * 10000 \ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) #if GCC_VERSION >= 40901 #undef QE_MM_UNDEFINED_PS #define QE_MM_UNDEFINED_PS _mm_undefined_ps #endif #undef GCC_VERSION #endif // __clang__/__GNUC__ #endif // __SSE2__ #ifdef __SSE4_1__ #include #endif #ifdef __AVX__ #include // SSE3 #include // AVX #endif #ifdef __AVX512F__ #include #endif #ifdef __ARM_NEON // We only have sufficient floating-point intrinsics to vectorize on // 64-bit ARM targets #if defined(__aarch64__) || defined(_M_ARM64) #define QE_HAVE_ARM64_NEON 1 #include "arm_neon.h" #endif #endif namespace quick_e { // ------------------------------------------------------------------- // Traits // ------------------------------------------------------------------- template struct packet { static const bool is_available = false; static const int size = 1; typedef Type type; }; template struct longest_packet { typedef Type type; static const int size = 1; }; // g++ issues ugly warnings if VEC is an Intel intrinsic, disabled // with -Wno-ignored-attributes #define QE_DEFINE_TRAITS(TYPE, SIZE, VEC, HALF_TYPE) \ template <> struct packet { \ static const bool is_available = true; \ static const int size = SIZE; \ typedef VEC type; \ typedef HALF_TYPE half_type; \ }; #define QE_DEFINE_LONGEST(VECS, VECD) \ template <> struct longest_packet { \ typedef VECS type; \ static const int size = sizeof(VECS)/sizeof(float); \ }; \ template <> struct longest_packet { \ typedef VECD type; \ static const int size = sizeof(VECD)/sizeof(double);\ }; #ifdef __SSE2__ #define QE_HAVE_FAST_EXP 1 QE_DEFINE_TRAITS(float, 4, __m128, __m128) QE_DEFINE_TRAITS(double, 2, __m128d, double) #ifdef __AVX__ QE_DEFINE_TRAITS(float, 8, __m256, __m128) QE_DEFINE_TRAITS(double, 4, __m256d, __m128d) #ifdef __AVX512F__ QE_DEFINE_TRAITS(float, 16, __m512, __m256) QE_DEFINE_TRAITS(double, 8, __m512d, __m256d) QE_DEFINE_LONGEST(__m512, __m512d) #define QE_LONGEST_FLOAT_PACKET 16 #define QE_LONGEST_DOUBLE_PACKET 8 #else QE_DEFINE_LONGEST(__m256, __m256d) #define QE_LONGEST_FLOAT_PACKET 8 #define QE_LONGEST_DOUBLE_PACKET 4 #endif #else QE_DEFINE_LONGEST(__m128, __m128d) #define QE_LONGEST_FLOAT_PACKET 4 #define QE_LONGEST_DOUBLE_PACKET 2 #endif // If QE_AVAILABLE is defined then we can use the fast exponential #define QE_AVAILABLE #elif defined(QE_HAVE_ARM64_NEON) #define QE_HAVE_FAST_EXP 1 QE_DEFINE_TRAITS(float, 4, float32x4_t, float32x4_t) QE_DEFINE_TRAITS(double, 2, float64x2_t, double) QE_DEFINE_LONGEST(float32x4_t, float64x2_t) #define QE_LONGEST_FLOAT_PACKET 4 #define QE_LONGEST_DOUBLE_PACKET 2 #else // No vectorization available: longest packet is of size 1 QE_DEFINE_LONGEST(float, double); #define QE_LONGEST_FLOAT_PACKET 1 #define QE_LONGEST_DOUBLE_PACKET 1 #endif // ------------------------------------------------------------------- // Scalars // ------------------------------------------------------------------- // Define a few functions for scalars in order that the same // implementation of "exp" can be used for both scalars and SIMD // vectors template T add(T x, T y) { return x+y; } template T sub(T x, T y) { return x-y; } template T mul(T x, T y) { return x*y; } template T div(T x, T y) { return x/y; } template T neg(T x) { return -x; } template void store(T* d, V x) { *d = x; } template void storeu(T* d, V x){ *d = x; } template V load(const T* d) { return *d; } template V loadu(const T* d){ return *d; } template V set1(T x) { return x; } template inline V set0() { return 0.0; }; template T sqrt(T x) { return std::sqrt(x); } template T hsum(T x) { return x; } template T hmul(T x) { return x; } template T hmin(T x) { return x; } template T hmax(T x) { return x; } template T fma(T x, T y, T z) { return (x*y)+z; } template T fnma(T x, T y, T z) { return z-(x*y); } template T fmin(T x, T y) { return std::min(x,y); } template T fmax(T x, T y) { return std::max(x,y); } #if __cplusplus > 199711L template <> inline float fmin(float x, float y) { return std::fmin(x,y); } template <> inline double fmin(double x, double y) { return std::fmin(x,y); } template <> inline float fmax(float x, float y) { return std::fmax(x,y); } template <> inline double fmax(double x, double y) { return std::fmax(x,y); } #endif inline float select_gt(float x1, float x2, float y1, float y2) { if (x1 > x2) { return y1; } else { return y2; } } inline double select_gt(double x1, double x2, double y1, double y2) { if (x1 > x2) { return y1; } else { return y2; } } inline bool all_in_range(float x, float low_bound, float high_bound) { return x >= low_bound && x <= high_bound; } inline bool all_in_range(double x, double low_bound, double high_bound) { return x >= low_bound && x <= high_bound; } // ------------------------------------------------------------------- // Macros to define mathematical operations // ------------------------------------------------------------------- // Basic load store, arithmetic, sqrt, min and max #define QE_DEFINE_BASIC(TYPE, VEC, LOAD, LOADU, SET0, SET1, \ STORE, STOREU, ADD, SUB, MUL, DIV, \ SQRT, FMIN, FMAX) \ inline VEC add(VEC x, VEC y) { return ADD(x, y); } \ inline VEC sub(VEC x, VEC y) { return SUB(x, y); } \ inline VEC mul(VEC x, VEC y) { return MUL(x, y); } \ inline VEC div(VEC x, VEC y) { return DIV(x, y); } \ inline VEC neg(VEC x) { return SUB(SET0(), x); } \ template <> inline VEC set0() { return SET0(); } \ template <> inline VEC set1(TYPE x) { return SET1(x); } \ inline VEC sqrt(VEC x) { return SQRT(x); } \ inline VEC fmin(VEC x, VEC y) { return FMIN(x,y); } \ inline VEC fmax(VEC x, VEC y) { return FMAX(x,y); } \ template <> inline VEC load(const TYPE* d) \ { return LOAD(d); } \ template <> inline VEC loadu(const TYPE* d) \ { return LOADU(d); } \ inline void store(TYPE* d, VEC x) { STORE(d, x); } \ inline void storeu(TYPE* d, VEC x) { STOREU(d, x); } \ inline std::ostream& operator<<(std::ostream& os, VEC x) { \ static const int size = sizeof(VEC)/sizeof(TYPE); \ union { VEC v; TYPE d[size]; }; \ v = x; os << "{"; \ for (int i = 0; i < size; ++i) \ { os << " " << d[i]; } \ os << "}"; return os; \ } #define QE_DEFINE_CHOP(VEC, HALF_TYPE, LOW, HIGH, PACK) \ inline HALF_TYPE low(VEC x) { return LOW; } \ inline HALF_TYPE high(VEC x) { return HIGH; } \ inline VEC pack(HALF_TYPE x, HALF_TYPE y) { return PACK; } // Reduction operations: horizontal sum, product, min and max #define QE_DEFINE_HORIZ(TYPE, VEC, HSUM, HMUL, HMIN, HMAX) \ inline TYPE hsum(VEC x) { return HSUM(x); } \ inline TYPE hmul(VEC x) { return HMUL(x); } \ inline TYPE hmin(VEC x) { return HMIN(x); } \ inline TYPE hmax(VEC x) { return HMAX(x); } // Define fused multiply-add functions #define QE_DEFINE_FMA(TYPE, VEC, FMA, FNMA) \ inline VEC fma(VEC x,VEC y,VEC z) { return FMA(x,y,z); } \ inline VEC fma(VEC x,TYPE y,VEC z) \ { return FMA(x,set1(y),z); } \ inline VEC fma(TYPE x, VEC y, TYPE z) \ { return FMA(set1(x),y,set1(z)); } \ inline VEC fma(VEC x, VEC y, TYPE z) \ { return FMA(x,y,set1(z)); } \ inline VEC fnma(VEC x,VEC y,VEC z) { return FNMA(x,y,z);} // Alternative order of arguments for ARM NEON #define QE_DEFINE_FMA_ALT(TYPE, VEC, FMA, FNMA) \ inline VEC fma(VEC x,VEC y,VEC z) { return FMA(z,x,y); } \ inline VEC fma(VEC x,TYPE y,VEC z) \ { return FMA(z,x,set1(y)); } \ inline VEC fma(TYPE x, VEC y, TYPE z) \ { return FMA(set1(z),set1(x),y); } \ inline VEC fma(VEC x, VEC y, TYPE z) \ { return FMA(set1(z),x,y); } \ inline VEC fnma(VEC x,VEC y,VEC z) { return FNMA(z,x,y);} // Emulate fused multiply-add if instruction not available #define QE_EMULATE_FMA(TYPE, VEC) \ inline VEC fma(VEC x,VEC y,VEC z) { return add(mul(x,y),z);} \ inline VEC fma(VEC x,TYPE y,VEC z) \ { return add(mul(x,set1(y)),z); } \ inline VEC fma(TYPE x, VEC y, TYPE z) \ { return add(mul(set1(x),y),set1(z)); } \ inline VEC fma(VEC x, VEC y, TYPE z) \ { return add(mul(x,y),set1(z)); } \ inline VEC fnma(VEC x,VEC y,VEC z) { return sub(z,mul(x,y));} #define QE_DEFINE_POW2N_S(VEC, VECI, CASTTO, CASTBACK, SHIFTL, \ SETELEM) \ inline VEC pow2n(VEC n) { \ const float pow2_23 = 8388608.0; \ const float bias = 127.0; \ VEC a = add(n, set1(bias+pow2_23)); \ VECI b = CASTTO(a); \ VECI c = SHIFTL(b, SETELEM(23)); \ VEC d = CASTBACK(c); \ return d; \ } #define QE_DEFINE_POW2N_D(VEC, VECI, CASTTO, CASTBACK, SHIFTL, \ SETELEM) \ inline VEC pow2n(VEC n) { \ const double pow2_52 = 4503599627370496.0; \ const double bias = 1023.0; \ VEC a = add(n, set1(bias+pow2_52)); \ VECI b = CASTTO(a); \ VECI c = SHIFTL(b, SETELEM(52)); \ VEC d = CASTBACK(c); \ return d; \ } // ------------------------------------------------------------------- // Define operations for SSE2: vector of 4 floats or 2 doubles // ------------------------------------------------------------------- #ifdef __SSE2__ QE_DEFINE_BASIC(float, __m128, _mm_load_ps, _mm_loadu_ps, _mm_setzero_ps, _mm_set1_ps, _mm_store_ps, _mm_storeu_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps, _mm_div_ps, _mm_sqrt_ps, _mm_min_ps, _mm_max_ps) QE_DEFINE_BASIC(double, __m128d, _mm_load_pd, _mm_loadu_pd, _mm_setzero_pd, _mm_set1_pd, _mm_store_pd, _mm_storeu_pd, _mm_add_pd, _mm_sub_pd, _mm_mul_pd, _mm_div_pd, _mm_sqrt_pd, _mm_min_pd, _mm_max_pd) // Don't define chop operations for __m128 because we don't have a // container for two floats QE_DEFINE_CHOP(__m128d, double, _mm_cvtsd_f64(x), _mm_cvtsd_f64(_mm_unpackhi_pd(x,x)), _mm_set_pd(y,x)) // No built-in horizontal operations for SSE2, so need to implement // by hand #define QE_DEFINE_HORIZ_SSE2(FUNC, OP_PS, OP_SS, OP_PD) \ inline float FUNC(__m128 x) { \ __m128 shuf = _mm_shuffle_ps(x, x, _MM_SHUFFLE(2, 3, 0, 1)); \ __m128 sums = OP_PS(x, shuf); \ shuf = _mm_movehl_ps(shuf, sums); \ return _mm_cvtss_f32(OP_SS(sums, shuf)); \ } \ inline double FUNC(__m128d x) { \ __m128 shuftmp= _mm_movehl_ps(QE_MM_UNDEFINED_PS(), \ _mm_castpd_ps(x)); \ __m128d shuf = _mm_castps_pd(shuftmp); \ return _mm_cvtsd_f64(OP_PD(x, shuf)); \ } QE_DEFINE_HORIZ_SSE2(hsum, _mm_add_ps, _mm_add_ss, _mm_add_pd) QE_DEFINE_HORIZ_SSE2(hmul, _mm_mul_ps, _mm_mul_ss, _mm_mul_pd) QE_DEFINE_HORIZ_SSE2(hmin, _mm_min_ps, _mm_min_ss, _mm_min_pd) QE_DEFINE_HORIZ_SSE2(hmax, _mm_max_ps, _mm_max_ss, _mm_max_pd) #undef QE_MM_UNDEFINED_PS #undef QE_DEFINE_HORIZ_SSE2 #ifdef __FMA__ QE_DEFINE_FMA(float, __m128, _mm_fmadd_ps, _mm_fnmadd_ps) QE_DEFINE_FMA(double, __m128d, _mm_fmadd_pd, _mm_fnmadd_pd) #else QE_EMULATE_FMA(float, __m128) QE_EMULATE_FMA(double, __m128d) #endif #ifdef __SSE4_1__ inline __m128 unchecked_round(__m128 x) { return _mm_round_ps(x, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); } inline __m128d unchecked_round(__m128d x) { return _mm_round_pd(x, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); } #else // No native function available, but since the arguments are limited // to +/- 700, we don't need to check for going out of bounds inline __m128 unchecked_round(__m128 x) { return _mm_cvtepi32_ps(_mm_cvtps_epi32(x)); } inline __m128d unchecked_round(__m128d x) { return _mm_cvtepi32_pd(_mm_cvtpd_epi32(x)); } #endif inline float unchecked_round(float x) { return _mm_cvtss_f32(unchecked_round(_mm_set_ss(x))); } inline double unchecked_round(double x) { return low(unchecked_round(_mm_set_sd(x))); } QE_DEFINE_POW2N_S(__m128, __m128i, _mm_castps_si128, _mm_castsi128_ps, _mm_sll_epi32, _mm_cvtsi32_si128) QE_DEFINE_POW2N_D(__m128d, __m128i, _mm_castpd_si128, _mm_castsi128_pd, _mm_sll_epi64, _mm_cvtsi32_si128) inline float pow2n(float x) { return _mm_cvtss_f32(pow2n(quick_e::set1<__m128>(x))); } inline double pow2n(double x) { return low(pow2n(quick_e::set1<__m128d>(x))); } inline bool horiz_and(__m128i a) { #ifdef __SSE4_1__ return _mm_testc_si128(a, _mm_set1_epi32(-1)) != 0; #else __m128i t1 = _mm_unpackhi_epi64(a, a); // get 64 bits down __m128i t2 = _mm_and_si128(a, t1); // and 64 bits #ifdef __x86_64__ int64_t t5 = _mm_cvtsi128_si64(t2); // transfer 64 bits to integer return t5 == int64_t(-1); #else __m128i t3 = _mm_srli_epi64(t2, 32); // get 32 bits down __m128i t4 = _mm_and_si128(t2, t3); // and 32 bits int t5 = _mm_cvtsi128_si32(t4); // transfer 32 bits to integer return t5 == -1; #endif // __x86_64__ #endif // SSE 4.1 } inline bool all_in_range(__m128 x, float low_bound, float high_bound) { return horiz_and(_mm_castps_si128(_mm_and_ps( _mm_cmpge_ps(x,set1<__m128>(low_bound)), _mm_cmple_ps(x,set1<__m128>(high_bound))))); } inline bool all_in_range(__m128d x, double low_bound, double high_bound) { return horiz_and(_mm_castpd_si128(_mm_and_pd( _mm_cmpge_pd(x,set1<__m128d>(low_bound)), _mm_cmple_pd(x,set1<__m128d>(high_bound))))); } // If x1 > x2, select y1, or select y2 otherwise inline __m128 select_gt(__m128 x1, __m128 x2, __m128 y1, __m128 y2) { __m128 mask = _mm_cmpgt_ps(x1,x2); #ifdef __SSE4_1__ return _mm_blendv_ps(y2, y1, mask); #else return _mm_or_ps(_mm_and_ps(mask, y1), _mm_andnot_ps(mask, y2)); #endif } inline __m128d select_gt(__m128d x1, __m128d x2, __m128d y1, __m128d y2) { __m128d mask = _mm_cmpgt_pd(x1,x2); #ifdef __SSE4_1__ return _mm_blendv_pd(y2, y1, mask); #else return _mm_or_pd(_mm_and_pd(mask, y1), _mm_andnot_pd(mask, y2)); #endif } #endif // ------------------------------------------------------------------- // Define operations for AVX: vector of 8 floats or 4 doubles // ------------------------------------------------------------------- #ifdef __AVX__ QE_DEFINE_BASIC(float, __m256, _mm256_load_ps, _mm256_loadu_ps, _mm256_setzero_ps, _mm256_set1_ps, _mm256_store_ps, _mm256_storeu_ps, _mm256_add_ps, _mm256_sub_ps, _mm256_mul_ps, _mm256_div_ps, _mm256_sqrt_ps, _mm256_min_ps, _mm256_max_ps) QE_DEFINE_BASIC(double, __m256d, _mm256_load_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_set1_pd, _mm256_store_pd, _mm256_storeu_pd, _mm256_add_pd, _mm256_sub_pd, _mm256_mul_pd, _mm256_div_pd, _mm256_sqrt_pd, _mm256_min_pd, _mm256_max_pd) QE_DEFINE_CHOP(__m256, __m128, _mm256_castps256_ps128(x), _mm256_extractf128_ps(x,1), _mm256_permute2f128_ps(_mm256_castps128_ps256(x), _mm256_castps128_ps256(y), 0x20)) QE_DEFINE_CHOP(__m256d, __m128d, _mm256_castpd256_pd128(x), _mm256_extractf128_pd(x,1), _mm256_permute2f128_pd(_mm256_castpd128_pd256(x), _mm256_castpd128_pd256(y), 0x20)); // Implement by calling SSE2 h* functions inline float hsum(__m256 x) { return hsum(add(low(x), high(x))); } inline float hmul(__m256 x) { return hmul(mul(low(x), high(x))); } inline float hmin(__m256 x) { return hmin(fmin(low(x), high(x))); } inline float hmax(__m256 x) { return hmax(fmax(low(x), high(x))); } inline double hsum(__m256d x) { return hsum(add(low(x), high(x))); } // Alternative would be to use _mm_hadd_pd inline double hmul(__m256d x) { return hmul(mul(low(x), high(x))); } inline double hmin(__m256d x) { return hmin(fmin(low(x), high(x))); } inline double hmax(__m256d x) { return hmax(fmax(low(x), high(x))); } // Define extras #ifdef __FMA__ QE_DEFINE_FMA(float, __m256, _mm256_fmadd_ps, _mm256_fnmadd_ps) QE_DEFINE_FMA(double, __m256d, _mm256_fmadd_pd, _mm256_fnmadd_pd) #else QE_EMULATE_FMA(float, __m256) QE_EMULATE_FMA(double, __m256d) #endif inline __m256 unchecked_round(__m256 x) { return _mm256_round_ps(x, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); } inline __m256d unchecked_round(__m256d x) { return _mm256_round_pd(x, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); } #ifdef __AVX2__ QE_DEFINE_POW2N_S(__m256, __m256i, _mm256_castps_si256, _mm256_castsi256_ps, _mm256_sll_epi32, _mm_cvtsi32_si128) QE_DEFINE_POW2N_D(__m256d, __m256i, _mm256_castpd_si256, _mm256_castsi256_pd, _mm256_sll_epi64, _mm_cvtsi32_si128) #else // Suboptimized versions call the SSE2 functions on the upper and // lower parts inline __m256 pow2n(__m256 n) { return pack(pow2n(low(n)), pow2n(high(n))); } inline __m256d pow2n(__m256d n) { return pack(pow2n(low(n)), pow2n(high(n))); } #endif // Return true if all elements of x are in the range (inclusive) of // low_bound to high_bound. If so the exp call can exit before the // more costly case of working out what to do with inputs out of // bounds. Note that _CMP_GE_OS means compare // greater-than-or-equal-to, ordered, signaling, where "ordered" // means that if either operand is NaN, the result is false. inline bool all_in_range(__m256 x, float low_bound, float high_bound) { return _mm256_testc_si256(_mm256_castps_si256(_mm256_and_ps( _mm256_cmp_ps(x,set1<__m256>(low_bound), _CMP_GE_OS), _mm256_cmp_ps(x,set1<__m256>(high_bound), _CMP_LE_OS))), _mm256_set1_epi32(-1)) != 0; } inline bool all_in_range(__m256d x, double low_bound, double high_bound) { return _mm256_testc_si256(_mm256_castpd_si256(_mm256_and_pd( _mm256_cmp_pd(x,set1<__m256d>(low_bound), _CMP_GE_OS), _mm256_cmp_pd(x,set1<__m256d>(high_bound), _CMP_LE_OS))), _mm256_set1_epi32(-1)) != 0; } inline __m256 select_gt(__m256 x1, __m256 x2, __m256 y1, __m256 y2) { return _mm256_blendv_ps(y2, y1, _mm256_cmp_ps(x1,x2,_CMP_GT_OS)); } inline __m256d select_gt(__m256d x1, __m256d x2, __m256d y1, __m256d y2) { return _mm256_blendv_pd(y2, y1, _mm256_cmp_pd(x1,x2,_CMP_GT_OS)); } #endif // ------------------------------------------------------------------- // Define operations for AVX512: vector of 16 floats or 8 doubles // ------------------------------------------------------------------- #ifdef __AVX512F__ QE_DEFINE_BASIC(float, __m512, _mm512_load_ps, _mm512_loadu_ps, _mm512_setzero_ps, _mm512_set1_ps, _mm512_store_ps, _mm512_storeu_ps, _mm512_add_ps, _mm512_sub_ps, _mm512_mul_ps, _mm512_div_ps, _mm512_sqrt_ps, _mm512_min_ps, _mm512_max_ps) QE_DEFINE_HORIZ(float, __m512, _mm512_reduce_add_ps, _mm512_reduce_mul_ps, _mm512_reduce_min_ps, _mm512_reduce_max_ps) QE_DEFINE_BASIC(double, __m512d, _mm512_load_pd, _mm512_loadu_pd, _mm512_setzero_pd, _mm512_set1_pd, _mm512_store_pd, _mm512_storeu_pd, _mm512_add_pd, _mm512_sub_pd, _mm512_mul_pd, _mm512_div_pd, _mm512_sqrt_pd, _mm512_min_pd, _mm512_max_pd) QE_DEFINE_HORIZ(double, __m512d, _mm512_reduce_add_pd, _mm512_reduce_mul_pd, _mm512_reduce_min_pd, _mm512_reduce_max_pd) inline __m512 unchecked_round(__m512 x) { return _mm512_roundscale_ps(x, 0); } inline __m512d unchecked_round(__m512d x) { return _mm512_roundscale_pd(x, 0); } QE_DEFINE_FMA(float, __m512, _mm512_fmadd_ps, _mm512_fnmadd_ps) QE_DEFINE_FMA(double, __m512d, _mm512_fmadd_pd, _mm512_fnmadd_pd) QE_DEFINE_POW2N_S(__m512, __m512i, _mm512_castps_si512, _mm512_castsi512_ps, _mm512_sll_epi32, _mm_cvtsi32_si128) QE_DEFINE_POW2N_D(__m512d, __m512i, _mm512_castpd_si512, _mm512_castsi512_pd, _mm512_sll_epi64, _mm_cvtsi32_si128) inline bool all_in_range(__m512 x, float low_bound, float high_bound) { return static_cast(_mm512_kand( _mm512_cmp_ps_mask(x,set1<__m512>(low_bound),_CMP_GE_OS), _mm512_cmp_ps_mask(x,set1<__m512>(high_bound),_CMP_LE_OS))) == static_cast(65535); } inline bool all_in_range(__m512d x, double low_bound, double high_bound) { return static_cast(_mm512_kand( _mm512_cmp_pd_mask(x,set1<__m512d>(low_bound),_CMP_GE_OS), _mm512_cmp_pd_mask(x,set1<__m512d>(high_bound),_CMP_LE_OS))) == static_cast(255); } inline __m512 select_gt(__m512 x1, __m512 x2, __m512 y1, __m512 y2) { return _mm512_mask_mov_ps(y2, _mm512_cmp_ps_mask(x1,x2,_CMP_GT_OS), y1); } inline __m512d select_gt(__m512d x1, __m512d x2, __m512d y1, __m512d y2) { return _mm512_mask_mov_pd(y2, _mm512_cmp_pd_mask(x1,x2,_CMP_GT_OS), y1); } #endif #ifdef QE_HAVE_ARM64_NEON // Implement ARM version of x86 setzero inline float32x4_t vzeroq_f32() { return vdupq_n_f32(0.0); } inline float64x2_t vzeroq_f64() { return vdupq_n_f64(0.0); } // Horizontal multiply across vector inline float vmulvq_f32(float32x4_t x) { union { float32x2_t v; float data[2]; }; v = vmul_f32(vget_low_f32(x), vget_high_f32(x)); return data[0] * data[1]; } inline double vmulvq_f64(float64x2_t x) { union { float64x2_t v; double data[2]; }; v = x; return data[0] * data[1]; } QE_DEFINE_BASIC(float, float32x4_t, vld1q_f32, vld1q_f32, vzeroq_f32, vdupq_n_f32, vst1q_f32, vst1q_f32, vaddq_f32, vsubq_f32, vmulq_f32, vdivq_f32, vsqrtq_f32, vminq_f32, vmaxq_f32) QE_DEFINE_HORIZ(float, float32x4_t, vaddvq_f32, vmulvq_f32, vminvq_f32, vmaxvq_f32) QE_DEFINE_BASIC(double, float64x2_t, vld1q_f64, vld1q_f64, vzeroq_f64, vdupq_n_f64, vst1q_f64, vst1q_f64, vaddq_f64, vsubq_f64, vmulq_f64, vdivq_f64, vsqrtq_f64, vminq_f64, vmaxq_f64) QE_DEFINE_HORIZ(double, float64x2_t, vaddvq_f64, vmulvq_f64, vminvq_f64, vmaxvq_f64) QE_DEFINE_POW2N_S(float32x4_t, int32x4_t, vreinterpretq_s32_f32, vreinterpretq_f32_s32, vshlq_s32, vdupq_n_s32) QE_DEFINE_POW2N_D(float64x2_t, int64x2_t, vreinterpretq_s64_f64, vreinterpretq_f64_s64, vshlq_s64, vdupq_n_s64) QE_DEFINE_FMA_ALT(float, float32x4_t, vfmaq_f32, vfmsq_f32) QE_DEFINE_FMA_ALT(double, float64x2_t, vfmaq_f64, vfmsq_f64) inline bool all_in_range(float32x4_t x, double low_bound, double high_bound) { union { uint32x2_t v; uint32_t data[2]; }; uint32x4_t tmp = vandq_u32(vcgeq_f32(x,vdupq_n_f32(low_bound)), vcleq_f32(x,vdupq_n_f32(high_bound))); v = vand_u32(vget_low_u32(tmp), vget_high_u32(tmp)); return data[0] && data[1]; } inline bool all_in_range(float64x2_t x, double low_bound, double high_bound) { union { uint64x2_t v; uint64_t data[2]; }; v = vandq_u64(vcgeq_f64(x,vdupq_n_f64(low_bound)), vcleq_f64(x,vdupq_n_f64(high_bound))); return data[0] && data[1]; } inline float32x4_t unchecked_round(float32x4_t x) { return vcvtq_f32_s32(vcvtaq_s32_f32(x)); } inline float64x2_t unchecked_round(float64x2_t x) { return vcvtq_f64_s64(vcvtaq_s64_f64(x)); } inline float32x4_t select_gt(float32x4_t x1, float32x4_t x2, float32x4_t y1, float32x4_t y2) { return vbslq_f32(vcgtq_f32(x1,x2), y1, y2); } inline float64x2_t select_gt(float64x2_t x1, float64x2_t x2, float64x2_t y1, float64x2_t y2) { return vbslq_f64(vcgtq_f64(x1,x2), y1, y2); } inline float unchecked_round(float x) { return vgetq_lane_f32(unchecked_round(vdupq_n_f32(x)), 0); } inline double unchecked_round(double x) { return vgetq_lane_f64(unchecked_round(vdupq_n_f64(x)), 0); } inline float pow2n(float x) { return vgetq_lane_f32(pow2n(vdupq_n_f32(x)),0); } inline double pow2n(double x) { return vgetq_lane_f64(pow2n(vdupq_n_f64(x)),0); } #endif #ifdef QE_HAVE_FAST_EXP // ------------------------------------------------------------------- // Implementation of fast exponential // ------------------------------------------------------------------- template static inline Vec polynomial_5(Vec const x, Type c0, Type c1, Type c2, Type c3, Type c4, Type c5) { // calculates polynomial c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0 using quick_e::fma; Vec x2 = mul(x, x); Vec x4 = mul(x2, x2); return fma(fma(c3, x, c2), x2, fma(fma(c5, x, c4), x4, fma(c1, x, c0))); } template inline Vec fastexp_float(Vec const initial_x) { using namespace quick_e; using quick_e::unchecked_round; using quick_e::fma; // Taylor coefficients const float P0expf = 1.f/2.f; const float P1expf = 1.f/6.f; const float P2expf = 1.f/24.f; const float P3expf = 1.f/120.f; const float P4expf = 1.f/720.f; const float P5expf = 1.f/5040.f; const float VM_LOG2E = 1.44269504088896340736; // 1/log(2) const float ln2f_hi = 0.693359375f; const float ln2f_lo = -2.12194440e-4f; #ifndef __FAST_MATH__ const float min_x = -87.3f; const float max_x = +89.0f; #endif Vec r = unchecked_round(mul(initial_x,set1(VM_LOG2E))); Vec x = fnma(r, set1(ln2f_hi), initial_x); // x -= r * ln2f_hi; x = fnma(r, set1(ln2f_lo), x); // x -= r * ln2f_lo; Vec z = polynomial_5(x,P0expf,P1expf,P2expf,P3expf,P4expf,P5expf); Vec x2 = mul(x, x); z = fma(z, x2, x); // z *= x2; z += x; // multiply by power of 2 Vec n2 = pow2n(r); z = fma(z,n2,n2); #ifdef __FAST_MATH__ return z; #else if (all_in_range(initial_x, min_x, max_x)) { return z; } else { // When initial_x<-87.3, set exp(x) to -Inf z = select_gt(set1(min_x), initial_x, set0(), z); // When initial_x>+89.0, set exp(x) to +Inf z = select_gt(initial_x, set1(max_x), set1(std::numeric_limits::infinity()), z); return z; } #endif } template Vec polynomial_13m(Vec const x, Type c2, Type c3, Type c4, Type c5, Type c6, Type c7, Type c8, Type c9, Type c10, Type c11, Type c12, Type c13) { // calculates polynomial c13*x^13 + c12*x^12 + ... + x + 0 using quick_e::fma; Vec x2 = mul(x, x); Vec x4 = mul(x2, x2); // Vec x8 = mul(x4, x4); return fma(fma(fma(c13, x, c12), x4, fma(fma(c11, x, c10), x2, fma(c9, x, c8))), mul(x4, x4), fma(fma(fma(c7, x, c6), x2, fma(c5, x, c4)), x4, fma(fma(c3, x, c2), x2, x))); //return fma(fma(fma(fma(fma(fma(fma(fma(fma(fma(fma(fma(c13, x, c12), x, c11), x, c10), x, c9), x, c8), x, c7), x, c6), x, c5), x, c4), x, c3), x, c2), mul(x,x), x); } // Template function implementing the fast exponential, where Vec // can be double, __m128d, __m256d or __m512d template inline Vec fastexp_double(Vec const initial_x) { using namespace quick_e; using quick_e::unchecked_round; using quick_e::fma; const double p2 = 1./2.; const double p3 = 1./6.; const double p4 = 1./24.; const double p5 = 1./120.; const double p6 = 1./720.; const double p7 = 1./5040.; const double p8 = 1./40320.; const double p9 = 1./362880.; const double p10 = 1./3628800.; const double p11 = 1./39916800.; const double p12 = 1./479001600.; const double p13 = 1./6227020800.; const double VM_LOG2E = 1.44269504088896340736; // 1/log(2) const double ln2d_hi = 0.693145751953125; const double ln2d_lo = 1.42860682030941723212E-6; #ifndef __FAST_MATH__ const double min_x = -708.39; const double max_x = +709.70; #endif Vec r = unchecked_round(mul(initial_x,set1(VM_LOG2E))); // subtraction in two steps for higher precision Vec x = fnma(r, set1(ln2d_hi), initial_x); // x -= r * ln2d_hi; x = fnma(r, set1(ln2d_lo), x); // x -= r * ln2d_lo; // multiply by power of 2 Vec n2 = pow2n(r); Vec z = polynomial_13m(x, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); z = fma(z,n2,n2); #ifdef __FAST_MATH__ return z; #else if (all_in_range(initial_x, min_x, max_x)) { // Fast normal path return z; } else { // When initial_x<-708.39, set exp(x) to 0.0 z = select_gt(set1(min_x), initial_x, set0(), z); // When initial_x>+709.70.0, set exp(x) to +Inf z = select_gt(initial_x, set1(max_x), set1(std::numeric_limits::infinity()), z); return z; } #endif } #endif // Define the various overloads for the quick_e::exp function taking // Intel intrinsics as an argument #ifdef __SSE2__ inline __m128 exp(__m128 x) { return fastexp_float(x); } inline __m128d exp(__m128d x) { return fastexp_double(x); } #endif #ifdef __AVX__ inline __m256 exp(__m256 x) { return fastexp_float(x); } inline __m256d exp(__m256d x) { return fastexp_double(x); } #endif #ifdef __AVX512F__ inline __m512 exp(__m512 x) { return fastexp_float(x); } inline __m512d exp(__m512d x) { return fastexp_double(x); } #endif #ifdef QE_HAVE_ARM64_NEON inline float32x4_t exp(float32x4_t x) { return fastexp_float(x); } inline float64x2_t exp(float64x2_t x) { return fastexp_double(x); } #endif // Define the quick_e::exp function for scalar arguments #ifdef QE_HAVE_FAST_EXP inline float exp(float x) { return quick_e::fastexp_float(x); } inline double exp(double x) { return quick_e::fastexp_double(x); } #else // If no vectorization available then we fall back to the standard // library scalar version inline float exp(float x) { return std::exp(x); } inline double exp(double x) { return std::exp(x); } #endif #undef QE_DEFINE_TRAITS #undef QE_DEFINE_LONGEST #undef QE_DEFINE_BASIC #undef QE_DEFINE_CHOP #undef QE_DEFINE_HORIZ #undef QE_DEFINE_FMA #undef QE_DEFINE_FMA_ALT #undef QE_EMULATE_FMA #undef QE_DEFINE_POW2N_S #undef QE_DEFINE_POW2N_D #undef QE_HAVE_FAST_EXP #undef QE_HAVE_ARM64_NEON } #endif ================================================ FILE: include/adept/reduce.h ================================================ /* reduce.h -- "Reduce" functions such as find, all, sum etc. Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. This file implements a number of array functions whose return values are reduced in either rank or size compared to their arguments. The first is the "find" function that takes a rank-1 bool Expression, and returns an IntVector of indices to the "true" values. This is modelled on Matlab's "find" function. A number of further reduce functions are implemented using the same calling style as the equivalent Fortran-90 functions. They fall into two types: 1. sum, mean, product, minval, maxval, norm2 2. all, any The first take active or inactive Expression arguments of real or (sometimes) integer type, while the second only take inactive Expressions of bool type. If called with one Expression argument of any rank, a single value is returned containing the result of the reduce operation on all the elements of the Expression. If a second integer argument is provided then the operation is carried out along that dimension and an Expression of rank one less than the first argument is returned. These functions are implemented by delegating to a generic "Reduce" function that uses policy classes to implement the elemental operations. */ #ifndef AdeptReduce_H #define AdeptReduce_H #include #include #include #include #include #include namespace adept { // ------------------------------------------------------------------- // Section 1. "find" // ------------------------------------------------------------------- // This function takes a rank-1 bool Expression, and returns an // IntVector of indices to the "true" values. template inline typename internal::enable_if::type find(const Expression& rhs) { ExpressionSize<1> length; // Check the argument of the function is a valid expression if (!rhs.get_dimensions(length)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } // Length of the rank-1 expression Index& len = length[0]; // Allocate a return vector of the same length as the expression // in case all values are true IntVector ans(len); // Keep track of the actual number of true values Index true_len = 0; // Get location of first value in expression ExpressionSize<1> coords(0); ExpressionSize loc; rhs.set_location(coords, loc); // Loop over all values in the expression for (int i = 0; i < len; i++) { if (rhs.next_value(loc)) { ans(true_len++) = i; } } if (true_len == 0) { // No values are "true": return an empty vector return IntVector(); } else if (true_len < len) { // Some values are "true": return the part of the "ans" vector // that contains indices to these values. Note that the // following subsetting operation links to the original data // rather than copying it. return ans(range(0,true_len-1)); } else { // All values are "true": return the entire vector. return ans; } } namespace internal { // For minval and maxval to work we need starting values for the accumulation template struct numeric_limits { }; template struct numeric_limits::has_infinity>::type> { static T min_inf() { return std::numeric_limits::min(); } static T max_inf() { return std::numeric_limits::max(); } }; template struct numeric_limits::has_infinity>::type> { static T min_inf() { return -std::numeric_limits::infinity(); } static T max_inf() { return std::numeric_limits::infinity(); } }; // ------------------------------------------------------------------- // Section 2. Policy classes to enable the generic "reduce" function // ------------------------------------------------------------------- // Sum enables the "sum" function that sums its arguments. template struct Sum { // What is the type of the running total? typedef T total_type; // Number of extra operations per element, needed for reserving // space in active calculations static const int extra_element_cost = 0; // Do we need to do anything to the final summed value(s)? static const bool finish_needed = false; // Do we need to do anything to the final summed value(s) in the // case that we are doing automatic differentiation? static const bool active_finish_needed = true; // Used by "expression_string()" const char* name() { return "sum"; } // Start the accumulation with zero T first_value() { return 0; } // Accumulation consists of incrementing "total" by the value on // the right hand side; note that the arguments are either of // type T or type Packet template void accumulate(E& total, const E& rhs) { total += rhs; } // When the reduce operation is vectorized, packets of data are // accumulated, requiring the ability to horizontally accumulate // each element of the packet, but only the packet2 version is // needed (the original accumulate_packet had problems with the // norm2 function, and is no longer used - can be removed) //T accumulate_packet(const Packet& ptotal) { // return hsum(ptotal); //} template void accumulate_packet2(E& total, const Packet& ptotal) { total += hsum(ptotal); } // In the case of active arguments, the next_value_and_gradient // function pushes the right hand side onto the operation stack, // but does not push the "total" object onto the statement // stack. This is done right at the end of the summation // operations. template void accumulate_active(Active& total, const E& rhs, ExpressionSize& loc) { total.lvalue() += rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc); } // No need to do anything to the final value template void finish(X& total, const Index& n) { } // In the active case, the final action is to complete the // storage of the differential statement by pushing the left // hand side onto the statement stack. void finish_active(Active& total, const Index& n) { ADEPT_ACTIVE_STACK->push_lhs(total.gradient_index()); } }; // Mean enables the "mean" function - the same as "sum" but // dividing the final result by the number of elements averaged. template struct Mean { typedef T total_type; static const int extra_element_cost = 0; static const bool finish_needed = true; static const bool active_finish_needed = true; const char* name() { return "mean"; } T first_value() { return 0; } template void accumulate(E& total, const E& rhs) { total += rhs; } //T accumulate_packet(const Packet& ptotal) { // return hsum(ptotal); //} template void accumulate_packet2(E& total, const Packet& ptotal) { total += hsum(ptotal); } template void accumulate_active(Active& total, const E& rhs, ExpressionSize& loc) { total.lvalue() += rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc); } template // Divide by the total number of elements void finish(X& total, const Index& n) { total /= n; } void finish_active(Active& total, const Index& n) { ADEPT_ACTIVE_STACK->push_lhs(total.gradient_index()); total /= n; } }; // Product enables the "product" function that multiplies all its // arguments together. template struct Product { typedef T total_type; static const int extra_element_cost = 1; static const bool finish_needed = false; static const bool active_finish_needed = false; const char* name() { return "product"; } T first_value() { return 1; } template void accumulate(E& total, const E& rhs) { total *= rhs; } //T accumulate_packet(const Packet& ptotal) { // return hprod(ptotal); //} template void accumulate_packet2(E& total, const Packet& ptotal) { total *= hprod(ptotal); } template void accumulate_active(Active& total, const E& rhs, ExpressionSize& loc) { // Differentiate t = t*x -> dt = t*dx + x*dt. First compute // x, while passing t as the last argument so that t*dx is put // on the operation stack. T xval = rhs.next_value_and_gradient_special(*ADEPT_ACTIVE_STACK, loc, total.value()); // Now treat x as inactive and Active will do the rest total *= xval; } template void finish(X& total, const Index& n) { } void finish_active(Active& total, const Index& n) { } }; // MaxVal enables the "maxval" function that returns the maximum value template struct MaxVal { typedef T total_type; static const int extra_element_cost = 0; static const bool finish_needed = false; static const bool active_finish_needed = false; const char* name() { return "maxval"; } // Initiate the total with the minimum possible value T first_value() { return internal::numeric_limits::min_inf(); } #ifdef ADEPT_CXX11_FEATURES void accumulate(T& total, const T& rhs) { using std::fmax; total = fmax(total,rhs); } template void accumulate_packet2(E& total, const Packet& ptotal) { using std::fmax; total = fmax(total,hmax(ptotal)); } #else void accumulate(T& total, const T& rhs) { using std::max; total = max(total,rhs); } template void accumulate_packet2(E& total, const Packet& ptotal) { using std::max; total = max(total,hmax(ptotal)); } #endif void accumulate(Packet& total, const Packet& rhs) { total = fmax(total,rhs); } //T accumulate_packet(const Packet& ptotal) { // return hmax(ptotal); //} template void accumulate_active(Active& total, const E& rhs, ExpressionSize& loc) { // The following is not optimal since if a maximum is found // then the value is evaluated twice. Better would be to // locate the maximum in the entire array, then do the active // stuff just for that element. if (rhs.value_at_location(loc) > total.value()) { // The right hand side puts itself on the operation stack, // while operator= puts the left hand side on the statement // stack. total = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc); } else { rhs.advance_location(loc); } } template void finish(X& total, const Index& n) { } void finish_active(Active& total, const Index& n) { } }; // MinVal enables the "minval" function that returns the minimum value template struct MinVal { typedef T total_type; static const int extra_element_cost = 0; static const bool finish_needed = false; static const bool active_finish_needed = false; const char* name() { return "minval"; } T first_value() { return internal::numeric_limits::max_inf(); } #ifdef ADEPT_CXX11_FEATURES void accumulate(T& total, const T& rhs) { using std::fmin; total = fmin(total,rhs); } void accumulate_packet2(T& total, const Packet& ptotal) { using std::fmin; total = fmin(total,hmin(ptotal)); } #else void accumulate(T& total, const T& rhs) { using std::min; total = min(total,rhs); } void accumulate_packet2(T& total, const Packet& ptotal) { using std::min; total = min(total,hmin(ptotal)); } #endif void accumulate(Packet& total, const Packet& rhs) { total = fmin(total,rhs); } //T accumulate_packet(const Packet& ptotal) { // return hmin(ptotal); //} template void accumulate_active(Active& total, const E& rhs, ExpressionSize& loc) { // The following is not optimal since if a maximum is found // then the value is evaluated twice if (rhs.value_at_location(loc) < total.value()) { // The right hand side puts itself on the operation stack, // while operator= puts the left hand side on the statement // stack. total = rhs.next_value_and_gradient(*ADEPT_ACTIVE_STACK, loc); } else { rhs.advance_location(loc); } } template void finish(X& total, const Index& n) { } void finish_active(Active& total, const Index& n) { } }; // Norm2 enables the "norm2" function that returns the L-2 norm of // its arguments, equal to sqrt(sum(rhs*rhs)) template struct Norm2 { typedef T total_type; static const int extra_element_cost = 0; static const bool finish_needed = true; static const bool active_finish_needed = true; const char* name() { return "norm2"; } T first_value() { return 0; } template void accumulate(E& total, const E& rhs) { total += rhs*rhs; } //T accumulate_packet(const Packet& ptotal) { // return hsum(ptotal); //} // Note that ptotal is already an accumulation of squared // values, so does not need to be squared again template void accumulate_packet2(E& total, const Packet& ptotal) { total += hsum(ptotal); } template void accumulate_active(Active& total, const E& rhs, ExpressionSize& loc) { // Differentiate t += x*x -> dt += 2*x*dx. Use the "special2" // version of the following function, where multiplier*x*dx is // put on the operation stack. T xval = rhs.next_value_and_gradient_special2(*ADEPT_ACTIVE_STACK, loc, 2.0); // Now do a purely inactive operation since we will put // "total" on the statement stack only right at the end total.lvalue() += xval*xval; } template void finish(X& total, const Index& n) { using std::sqrt; total = noalias(sqrt(total)); } void finish_active(Active& total, const Index& n) { using std::sqrt; // The operation stack now contains the derivatives of all the // squared elements on the right hand side. Here we complete // the differential statement by pushing the left hand side // onto the statement stack. ADEPT_ACTIVE_STACK->push_lhs(total.gradient_index()); // Since total is active it will do the right thing in the // final operation. total = noalias(sqrt(total)); } }; // All enables the "all" function that returns "true" only if all // the bool elements of the right hand side are true. It would be // faster if it could quit after finding the first "false". struct All { typedef bool total_type; static const bool finish_needed = false; const char* name() { return "all"; } bool first_value() { return true; } void accumulate(bool& total, const bool& rhs) { total = total && rhs; } template void finish(X& total, const Index& n) { } }; // Any enables the "any" function that returns "true" if any of // the bool elements of the right hand side are true. It would be // faster if it could quite after finding the first "true". struct Any { typedef bool total_type; static const bool finish_needed = false; const char* name() { return "any"; } bool first_value() { return false; } void accumulate(bool& total, const bool& rhs) { total = total || rhs; } template void finish(X& total, const Index& n) { } }; // Count enables the "count" function that returns the number of // "true" elements in a bool array. struct Count { typedef Index total_type; static const bool finish_needed = false; const char* name() { return "count"; } Index first_value() { return 0; } void accumulate(Index& total, const bool& rhs) { total += static_cast(rhs); } // true=1, false=0 template void finish(X& total, const Index& n) { } }; // ------------------------------------------------------------------- // Section 3. Various versions of the "reduce" function // ------------------------------------------------------------------- // Reduce an entire inactive array, unvectorized template inline typename internal::enable_if::is_vectorized &&is_same::value), typename Func::total_type>::type reduce_inactive(const Expression& rhs) { typename Func::total_type total; Func f; ExpressionSize dims; // Check right hand side is a valid expression if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims[0] == 0) { // Return zero if any of these functions applied to an empty // array total = 0; } else { total = f.first_value(); Index n = dims.size(); ExpressionSize i(0); ExpressionSize loc(0); int my_rank; static const int last = E::rank-1; do { i[last] = 0; rhs.set_location(i, loc); // Innermost loop for ( ; i[last] < dims[last]; ++i[last]) { f.accumulate(total, rhs.next_value(loc)); } my_rank = E::rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; } else { break; } } } while (my_rank >= 0); f.finish(total, n); } return total; } // Reduce an entire inactive array, vectorized template inline typename internal::enable_if::is_vectorized &&is_same::value, typename Func::total_type>::type reduce_inactive(const Expression& rhs) { typename Func::total_type total; Func f; ExpressionSize dims; // Check right hand side is a valid expression if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims[0] == 0) { // Return zero if any of these functions applied to an empty // array total = 0; } else if (dims[E::rank-1] >= Packet::size*2 && rhs.all_arrays_contiguous()) { // Vectorization is possible Packet ptotal(f.first_value()); Index n = dims.size(); ExpressionSize i(0); ExpressionSize loc(0); int my_rank; static const int last = E::rank-1; int iendvec; int istartvec = rhs.alignment_offset(); total = f.first_value(); if (istartvec < 0) { istartvec = iendvec = 0; } else { // Adjust iendvec such that iendvec-istartvec is a multiple // of the packet size iendvec = (dims[last]-istartvec); iendvec -= (iendvec % Packet::size); iendvec += istartvec; } do { i[last] = 0; rhs.set_location(i, loc); // Innermost loop for ( ; i[last] < istartvec; ++i[last]) { f.accumulate(total, rhs.next_value_contiguous(loc)); } for ( ; i[last] < iendvec; i[last] += Packet::size) { f.accumulate(ptotal, rhs.next_packet(loc)); } for ( ; i[last] < dims[last]; ++i[last]) { f.accumulate(total, rhs.next_value_contiguous(loc)); } my_rank = E::rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; } else { break; } } } while (my_rank >= 0); // norm2 cannot use accumulate here or elements will be squared twice //f.accumulate(total, f.accumulate_packet(ptotal)); f.accumulate_packet2(total, ptotal); f.finish(total, n); } else { // Back to unvectorized version total = f.first_value(); Index n = dims.size(); ExpressionSize i(0); ExpressionSize loc(0); int my_rank; static const int last = E::rank-1; do { i[last] = 0; rhs.set_location(i, loc); // Innermost loop for ( ; i[last] < dims[last]; ++i[last]) { f.accumulate(total, rhs.next_value(loc)); } my_rank = E::rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; } else { break; } } } while (my_rank >= 0); f.finish(total, n); } return total; } // Reduce the specified dimension of an inactive array of rank > 1 template inline void reduce_dimension(const Expression& rhs, int reduce_dim, Array& total) { Func f; ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims[0] == 0) { // Return empty array if any of these functions applied to an // empty array total.clear(); } else if (reduce_dim >= E::rank) { std::stringstream s; s << "In " << f.name() << "(Expression,dim=" << reduce_dim << "), dim must be less than rank."; throw invalid_dimension(s.str() ADEPT_EXCEPTION_LOCATION); } else { // New array has the same dimensions as the input but with one // of the dimensions removed ExpressionSize new_dims; int jnew = 0; for (int j = 0; j < E::rank; ++j) { if (j != reduce_dim) { new_dims[jnew++] = dims[j]; } } total.resize(new_dims); total = f.first_value(); ExpressionSize i(0); ExpressionSize inew(0); ExpressionSize loc(0); int my_rank; static const int last = E::rank-1; do { i[last] = 0; rhs.set_location(i, loc); // Innermost loop. Note that indexing of total with inew is // not very efficient for high-rank arrays since the // location must be computed from all dimensions each time. if (reduce_dim == last) { for ( ; i[last] < dims[last]; ++i[last]) { f.accumulate(total.get_lvalue(inew), rhs.next_value(loc)); } } else { for ( inew[last-1] = 0; i[last] < dims[last]; ++i[last], ++inew[last-1]) { f.accumulate(total.get_lvalue(inew), rhs.next_value(loc)); } } // Advancing to next innermost loop is somewhat involved // since we have to do something different when we reach the // dimension that is being reduced my_rank = E::rank-1; while (--my_rank >= 0) { ++i[my_rank]; if (my_rank < reduce_dim) { ++inew[my_rank]; if (i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; inew[my_rank] = 0; } else { break; } } else if (my_rank == reduce_dim) { if (i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; } else { break; } } // The following could be a simple "else", but sometimes // the compiler optimizes to the extent that it thinks // inew[-1] will be accessed (even though it won't), // leading to a warning about the array subscript being // out of bounds. Here the compiler knows the index must // be zero or positive. else if (my_rank > 0) { ++inew[my_rank-1]; if (i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; inew[my_rank-1] = 0; } else { break; } } } } while (my_rank >= 0); if (f.finish_needed) { f.finish(total, dims[reduce_dim]); } } } // Reduce the entirety of an active array template inline void reduce_active(const Expression& rhs, Active& total) { #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { total.lvalue() = reduce_inactive(rhs); return; } #endif Func f; ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims[0] == 0) { // Return zero if any of these functions applied to an empty // array total = 0; } else { total.set_value(f.first_value()); Index n = dims.size(); ExpressionSize i(0); ExpressionSize loc(0); int my_rank; static const int last = E::rank-1; // Check there is enough space on the operation stack by // working out the cost of all the elements of the array. Note // that the final operation to compute the total at the end is // dealt with separately. ADEPT_ACTIVE_STACK->check_space((E::n_active + Func::extra_element_cost) * n); do { i[last] = 0; rhs.set_location(i, loc); // Innermost loop for ( ; i[last] < dims[last]; ++i[last]) { f.accumulate_active(total, rhs, loc); } my_rank = E::rank-1; while (--my_rank >= 0) { if (++i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; } else { break; } } } while (my_rank >= 0); if (f.active_finish_needed) { f.finish_active(total, n); } } } // Reduce the specified dimension of an active array of rank > 1 template inline void reduce_dimension(const Expression& rhs, int reduce_dim, Array& result) { #ifdef ADEPT_RECORDING_PAUSABLE if (!ADEPT_ACTIVE_STACK->is_recording()) { // This solution requires more shallow copies than are really // needed; could be made more efficient if Array had a member // function to link an pre-constructed active Array to // inactive data. Array result_inactive; reduce_dimension(rhs, reduce_dim, result_inactive); Array result_active(result_inactive.data(), result_inactive.storage(), result_inactive.dimensions(), result_inactive.offset()); result >>= result_active; return; } #endif Func f; ExpressionSize dims; if (!rhs.get_dimensions(dims)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } else if (dims[0] == 0) { // Return empty array if any of these functions applied to an // empty array result.clear(); } else if (reduce_dim >= E::rank) { std::stringstream s; s << "In " << f.name() << "(Expression,dim=" << reduce_dim << "), dim must be less than rank."; throw invalid_dimension(s.str() ADEPT_EXCEPTION_LOCATION); } else { // New array has the same dimensions as the input but with one // of the dimensions removed ExpressionSize new_dims; int jnew = 0; for (int j = 0; j < E::rank; ++j) { if (j != reduce_dim) { new_dims[jnew++] = dims[j]; } } result.resize(new_dims); ExpressionSize i(0); ExpressionSize inew(0); ExpressionSize loc(0); int my_rank; Active total; Index n = dims.size(); // Check there is enough space on the operation stack, // including the per-element cost, and an additional cost to // finalize each individual strip of the array. Even though an // additional check is performed at the end of each completed // strip, the total number needs to be anticipated beforehand // (omitting this can cause memory corruption). ADEPT_ACTIVE_STACK->check_space((E::n_active + Func::extra_element_cost) * n + new_dims.size()); do { i[reduce_dim] = 0; // total.set_value(f.first_value()); total = f.first_value(); // Innermost loop. Note that indexing of total with inew is // not very efficient for high-rank arrays since the // location must be computed from all dimensions each time. for ( ; i[reduce_dim] < dims[reduce_dim]; ++i[reduce_dim]) { rhs.set_location(i, loc); f.accumulate_active(total, rhs, loc); } if (f.active_finish_needed) { f.finish_active(total, dims[reduce_dim]); } result.get_lvalue(inew) = total; my_rank = E::rank; while (--my_rank >= 0) { if (my_rank == reduce_dim) { continue; } ++i[my_rank]; if (my_rank < reduce_dim) { ++inew[my_rank]; if (i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; inew[my_rank] = 0; } else { break; } } else if (my_rank == reduce_dim) { if (i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; } else { break; } } else { ++inew[my_rank-1]; if (i[my_rank] >= dims[my_rank]) { i[my_rank] = 0; inew[my_rank-1] = 0; } else { break; } } } } while (my_rank >= 0); } } } // ------------------------------------------------------------------- // Section 4. Implement the functions // ------------------------------------------------------------------- // Implement sum(x), sum(x,dim), mean(x), mean(x,dim) etc. // Different versions of the "reduce" function are called depending // on whether "x" is active and whether "dim" is present. #define DEFINE_REDUCE_FUNCTION(NAME, CLASSNAME) \ /* function(inactive) */ \ template \ inline \ typename internal::enable_if::type \ NAME(const Expression& rhs) { \ return internal::reduce_inactive >(rhs); \ } \ \ /* function(active) */ \ template \ inline \ typename internal::enable_if >::type \ NAME(const Expression& rhs) { \ Active result; \ internal::reduce_active >(rhs, result); \ return result; \ } \ \ /* function(active[rank=1], dim) */ \ template \ inline \ typename internal::enable_if::type \ NAME(const Expression& rhs, int dim) { \ if (dim != 0) { \ throw invalid_dimension("Two-argument reduce function applied to vector must have zero as second argument" \ ADEPT_EXCEPTION_LOCATION); \ } \ return internal::reduce_inactive >(rhs); \ } \ \ /* function(active[rank=1], dim) */ \ template \ inline \ typename internal::enable_if >::type \ NAME(const Expression& rhs, int dim) { \ if (dim != 0) { \ throw invalid_dimension("Two-argument reduce function applied to vector must have zero as second argument" \ ADEPT_EXCEPTION_LOCATION); \ } \ Active result; \ internal::reduce_active >(rhs, result); \ return result; \ } \ \ /* function(inactive[rank>1], dim) */ \ /* function(active[rank>1], dim) */ \ template \ inline \ typename internal::enable_if<(E::rank > 1), \ Array >::type \ NAME(const Expression& rhs, int dim) { \ Array result; \ internal::reduce_dimension >(rhs, dim, result); \ return result; \ } DEFINE_REDUCE_FUNCTION(sum, Sum) DEFINE_REDUCE_FUNCTION(mean, Mean) DEFINE_REDUCE_FUNCTION(product, Product) DEFINE_REDUCE_FUNCTION(maxval, MaxVal) DEFINE_REDUCE_FUNCTION(minval, MinVal) DEFINE_REDUCE_FUNCTION(norm2, Norm2) #undef DEFINE_REDUCE_FUNCTION // Implement all(x), all(x,dim), any(x) and any(x,dim). Fewer // possibilities this time as no active versions. #define DEFINE_BOOL_REDUCE_FUNCTION(NAME, CLASSNAME) \ template \ inline bool NAME(const Expression& rhs) \ { return internal::reduce_inactive(rhs); } \ \ template \ inline \ Array \ NAME(const Expression& rhs, int dim) { \ Array result; \ internal::reduce_dimension(rhs, dim, result); \ return result; \ } DEFINE_BOOL_REDUCE_FUNCTION(all, All) DEFINE_BOOL_REDUCE_FUNCTION(any, Any) #undef DEFINE_BOOL_REDUCE_FUNCTION // count(x) and count(x,dim) is slightly different as it returns // Index template inline Index count(const Expression& rhs) { return internal::reduce_inactive(rhs); } template inline Array count(const Expression& rhs, int dim) { Array result; internal::reduce_dimension(rhs, dim, result); return result; } // ------------------------------------------------------------------- // Section 5. diag_vector // ------------------------------------------------------------------- // diag_vector(A,offdiag), where A is a 2D array, returns the // diagonal indexed by "offdiag" as a 1D array pointing to the // original data, or the main diagonal if offidag is missing. Can be // used as an lvalue. template Array<1,Type,IsActive> diag_vector(Array<2,Type,IsActive>& A, Index offdiag = 0) { ExpressionSize<2> dims = A.dimensions(); ExpressionSize<2> offset = A.offset(); ExpressionSize<1> new_dim, new_offset; new_offset[0] = offset[0]+offset[1]; if (offdiag >= 0) { new_dim[0] = std::min(dims[0], dims[1]-offdiag); return Array<1,Type,IsActive>(A.data()+offdiag*offset[1], A.storage(), new_dim, new_offset); } else { new_dim[0] = std::min(dims[0]+offdiag, dims[1]); return Array<1,Type,IsActive>(A.data()-offdiag*offset[0], A.storage(), new_dim, new_offset); } } // diag_vector(A,offdiag), where A is a 2D expression, returns the // diagonal indexed by "offdiag" as a 1D array, or the main diagonal // if offidag is missing. Cannot be used as an lvalue. template typename internal::enable_if >::type diag_vector(const Expression& arg, Index offdiag = 0) { ExpressionSize<2> dims; if (!arg.get_dimensions(dims)) { std::string str; str += "Array size mismatch in "; str += arg.expression_string(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } ExpressionSize<2> i; ExpressionSize ind; if (offdiag >= 0) { Index new_dim = std::min(dims[0], dims[1]-offdiag); Array<1,Type,E::is_active> v(new_dim); for (int j = 0; j < new_dim; ++j) { i[0] = j; i[1] = j+offdiag; arg.set_location(i, ind); v(j) = arg.next_value(ind); } return v; } else { Index new_dim = std::min(dims[0]+offdiag, dims[1]); Array<1,Type,E::is_active> v(new_dim); for (int j = 0; j < new_dim; ++j) { i[0] = j; i[1] = j+offdiag; arg.set_location(i, ind); v(j) = arg.next_value(ind); } return v; } } template typename internal::enable_if >::type diag_vector(const Expression& arg, Index offdiag = 0) { ExpressionSize<2> dims; if (!arg.get_dimensions(dims)) { std::string str; str += "Array size mismatch in "; str += arg.expression_string(); throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } ExpressionSize<2> i; ExpressionSize ind; if (offdiag >= 0) { Index new_dim = std::min(dims[0], dims[1]-offdiag); Array<1,Type,E::is_active> v(new_dim); for (int j = 0; j < new_dim; ++j) { i[0] = j; i[1] = j+offdiag; arg.set_location(i, ind); v.data()[j] = arg.next_value_and_gradient(*ADEPT_ACTIVE_STACK,ind); ADEPT_ACTIVE_STACK->push_lhs(v.gradient_index()+j); } return v; } else { Index new_dim = std::min(dims[0]+offdiag, dims[1]); Array<1,Type,E::is_active> v(new_dim); for (int j = 0; j < new_dim; ++j) { i[0] = j; i[1] = j+offdiag; arg.set_location(i, ind); v.data()[j] = arg.next_value_and_gradient(*ADEPT_ACTIVE_STACK,ind); ADEPT_ACTIVE_STACK->push_lhs(v.gradient_index()+j); } return v; } } // diag_matrix(v,offdiag), where v is a 1D expression, returns a // DiagMatrix whose diagonal is a copy of v. Cannot be used as an // lvalue. template typename internal::enable_if, E::is_active> >::type diag_matrix(const Expression& arg) { Array<1,Type,E::is_active> v = arg; return v.diag_matrix(); } // ------------------------------------------------------------------- // Section 6. dot_product // ------------------------------------------------------------------- template typename internal::enable_if::type, L::is_active || R::is_active>::type>::type dot_product(const Expression& l, const Expression& r) { return sum(l*r); } // ------------------------------------------------------------------- // Section 7. minloc // ------------------------------------------------------------------- template inline typename internal::enable_if::type minloc(const Expression& rhs) { ExpressionSize<1> length; // Check the argument of the function is a valid expression if (!rhs.get_dimensions(length)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } // Length of the rank-1 expression Index& len = length[0]; Type running_min = internal::numeric_limits::max_inf(); Index running_loc = 0; ExpressionSize<1> coords(0); ExpressionSize loc; rhs.set_location(coords, loc); // Loop over all values in the expression for (Index i = 0; i < len; i++) { Type val = rhs.next_value(loc); if (val < running_min) { running_min = val; running_loc = i; } } return running_loc; } // ------------------------------------------------------------------- // Section 8. maxloc // ------------------------------------------------------------------- template inline typename internal::enable_if::type maxloc(const Expression& rhs) { ExpressionSize<1> length; // Check the argument of the function is a valid expression if (!rhs.get_dimensions(length)) { std::string str = "Array size mismatch in " + rhs.expression_string() + "."; throw size_mismatch(str ADEPT_EXCEPTION_LOCATION); } // Length of the rank-1 expression Index& len = length[0]; Type running_max = internal::numeric_limits::min_inf(); Index running_loc = 0; ExpressionSize<1> coords(0); ExpressionSize loc; rhs.set_location(coords, loc); // Loop over all values in the expression for (Index i = 0; i < len; i++) { Type val = rhs.next_value(loc); if (val > running_max) { running_max = val; running_loc = i; } } return running_loc; } } // End namespace adept #endif ================================================ FILE: include/adept/scalar_shortcuts.h ================================================ /* shortcuts.h -- Definitions of "shortcut" typedefs for scalar types Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptScalarShortcuts_H #define AdeptScalarShortcuts_H #include #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION // First the case when automatic differentiation is ON #include namespace adept { typedef Active aReal; typedef Active afloat; typedef Active adouble; typedef Active > aComplex; typedef Active > aComplexFloat; typedef Active > aComplexDouble; inline Real value(Real x) { return x; } } // End namespace adept #else // Second the case when automatic differentiation is OFF #include namespace adept { typedef Real aReal; typedef float afloat; typedef double adouble; typedef std::complex aComplex; typedef std::complex aComplexFloat; typedef std::complex aComplexDouble; // Normally value(x) returns the inactive part of x, so if x is // inactive we simply return a constant reference to x template inline const T& value(const T& x) { return x; } inline Real value(Real x) { return x; } } // End namespace adept #endif #endif ================================================ FILE: include/adept/settings.h ================================================ /* settings.h -- View/change the overall Adept settings Copyright (C) 2016-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptSettings_H #define AdeptSettings_H 1 #include namespace adept { // ------------------------------------------------------------------- // Get compiler settings // ------------------------------------------------------------------- // Return the version of Adept at compile time std::string version(); // Return the compiler used to compile the Adept library (e.g. "g++ 4.3.2") std::string compiler_version(); // Return the compiler flags used when compiling the Adept library // (e.g. "-Wall -g -O3") std::string compiler_flags(); // Return a multi-line string listing numerous aspects of the way // Adept has been configured. std::string configuration(); // Was the library compiled with matrix multiplication support (from // BLAS)? bool have_matrix_multiplication(); // Was the library compiled with linear algebra support (e.g. inv // and solve from LAPACK) bool have_linear_algebra(); // ------------------------------------------------------------------- // Get/set number of threads for array operations // ------------------------------------------------------------------- // Get the maximum number of threads available for BLAS operations int max_blas_threads(); // Set the maximum number of threads available for BLAS operations // (zero means use the maximum sensible number on the current // system), and return the number actually set. Note that OpenBLAS // uses pthreads and the Jacobian calculation uses OpenMP - this can // lead to inefficient behaviour so if you are computing Jacobians // then you may get better performance by setting the number of // array threads to one. int set_max_blas_threads(int n); } // End namespace adept #endif ================================================ FILE: include/adept/solve.h ================================================ /* solve.h -- Solve systems of linear equations Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptSolve_H #define AdeptSolve_H 1 #include #include #include namespace adept { // ------------------------------------------------------------------- // Solve Ax = b for general square matrix A // ------------------------------------------------------------------- template Array<1,T,false> solve(const Array<2,T,false>& A, const Array<1,T,false>& b); // ------------------------------------------------------------------- // Solve AX = B for general square matrix A and rectangular matrix B // ------------------------------------------------------------------- template Array<2,T,false> solve(const Array<2,T,false>& A, const Array<2,T,false>& B); // ------------------------------------------------------------------- // Solve Ax = b for symmetric square matrix A // ------------------------------------------------------------------- template Array<1,T,false> solve(const SpecialMatrix,false>& A, const Array<1,T,false>& b); // ------------------------------------------------------------------- // Solve AX = B for symmetric square matrix A // ------------------------------------------------------------------- template Array<2,T,false> solve(const SpecialMatrix,false>& A, const Array<2,T,false>& B); // ------------------------------------------------------------------- // Solve AX = B for symmetric square matrices A and B // ------------------------------------------------------------------- // Simply copy B into a general dense matrix template inline Array<2,T,false> solve(const SpecialMatrix,false>& A, const SpecialMatrix,false>& B) { Array<2,T,false> B_array = B; return solve(A,B_array); } // ------------------------------------------------------------------- // Solve Ax = b for general expressions // ------------------------------------------------------------------- template typename internal::enable_if::value && internal::matrix_op_defined::value, Array<1,typename internal::promote::type,false> >::type solve(const Expression& l, const Expression& r) { typedef typename internal::promote::type PType; Array<2,PType,false> left = l.cast(); Array<1,PType,false> right = r.cast(); return solve(left,right); } // ------------------------------------------------------------------- // Solve AX = B for general expressions // ------------------------------------------------------------------- template typename internal::enable_if::value && internal::matrix_op_defined::value, Array<2,typename internal::promote::type,false> >::type solve(const Expression& l, const Expression& r) { typedef typename internal::promote::type PType; Array<2,PType,false> left = l.cast(); Array<2,PType,false> right = r.cast(); return solve(left,right); } } #endif ================================================ FILE: include/adept/spread.h ================================================ /* spread.h -- Spread an array into an additional dimension Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptSpread_H #define AdeptSpread_H #include namespace adept { namespace internal { // Expression representing the spread of an array into an // additional dimension template class Spread : public Expression > { typedef Array ArrayType; public: // Static data static const int rank = E::rank+1; static const bool is_active = E::is_active; static const int n_active = ArrayType::n_active; static const int n_scratch = 0; static const int n_arrays = ArrayType::n_arrays; // Currently not vectorizable if the final dimension is the // spread dimension because the current design always has the // array index increasing static const bool is_vectorizable = (SpreadDim != E::rank); protected: const ArrayType array; ExpressionSize dims; Index n; public: Spread(const Expression& e, Index n_) : array(e.cast()), n(n_) { for (int i = 0; i < SpreadDim; ++i) { dims[i] = array.dimension(i); } dims[SpreadDim] = n_; for (int i = SpreadDim+1; i < rank; ++i) { dims[i] = array.dimension(i-1); } // Communicate empty array if n == 0 if (n_ == 0) { dims[0] = 0; } } bool get_dimensions_(ExpressionSize& dim) const { dim = dims; return true; } std::string expression_string_() const { std::stringstream s; s << "spread<" << SpreadDim << ">(" << array.expression_string() << "," << n << ")"; return s.str(); } bool is_aliased_(const Type* mem1, const Type* mem2) const { return false; } bool all_arrays_contiguous_() const { return array.all_arrays_contiguous_(); } bool is_aligned_() const { return array.is_aligned_(); } template int alignment_offset_() const { return array.template alignment_offset_(); } // Do not implement value_with_len_ // Advance only if the spread dimension is not the last template void advance_location_(ExpressionSize& loc) const { // If false this if statement should be optimized away if (SpreadDim < rank-1) { array.template advance_location_(loc); } } template Type value_at_location_(const ExpressionSize& loc) const { return array.template value_at_location_(loc); } template Type value_at_location_store_(const ExpressionSize& loc, ScratchVector& scratch) const { return array.template value_at_location_(loc); } template Type value_stored_(const ExpressionSize& loc, const ScratchVector& scratch) const { return array.template value_at_location_(loc); } template Packet packet_at_location_(const ExpressionSize& loc) const { return packet_at_location_local_(loc); } protected: // Specializing for the case when the final dimension is the // final dimension of the wrapped array template typename enable_if >::type packet_at_location_local_(const ExpressionSize& loc) const { return array.template packet_at_location_(loc); } // Specializing for the case when the final dimension is to be // "spread". The following does not work because the array // location is incremented for packets when we really want it to // always point to the start of a row. It is deactivated by // is_vectorizable_ (above). template typename enable_if >::type packet_at_location_local_(const ExpressionSize& loc) const { return Packet(array.template value_at_location_(loc)); } public: template void set_location_(const ExpressionSize& i, ExpressionSize& index) const { ExpressionSize i_array(0); int j = 0; for ( ; j < SpreadDim; ++j) { i_array[j] = i[j]; } for ( ; j < rank-1; ++j) { i_array[j] = i[j+1]; } array.template set_location_(i_array, index); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch) const { array.template calc_gradient_(stack,loc,scratch); } template void calc_gradient_(Stack& stack, const ExpressionSize& loc, const ScratchVector& scratch, MyType multiplier) const { array.template calc_gradient_(stack,loc, scratch,multiplier); } }; } // Define spread function applied to an expression template typename internal::enable_if<(SpreadDim >= 0 && SpreadDim <= E::rank), internal::Spread >::type spread(const Expression& e, Index n) { return internal::Spread(e,n); } /* // If "spread" is applied to a scalar, we expand it to a Vector of // the same type template typename internal::enable_if::value, Array<1,Type,false> >::type spread(const Type& e, Index n) { Array<1,Type,false> arr(n); arr = e; return arr; } */ } #endif ================================================ FILE: include/adept/store_transpose.h ================================================ /* store_transpose.h -- Store the transpose of a vector of Packets Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. Vectorization of active expressions involves storage of the gradients in an object of type ScratchVector>, which we need to transpose when placing on the stack. */ #ifndef StoreTranspose_H #define StoreTranspose_H 1 #include #include namespace adept { namespace internal { // Unvectorized version template store_transpose(ScratchVector >& src, Type* dest) { for (int i = 0; i < Len; ++i) { union { Packet::intrinsic_type packet; Type[Packet::size] array; } packet = src[i]; for (int j = 0; j < Packet::size; ++j) { dest[j*Len] = array[j]; } ++dest; } } } } #endif ================================================ FILE: include/adept/traits.h ================================================ /* traits.h -- Traits used to support array/automatic differentiation expressions Copyright (C) 2012-2014 University of Reading Copyright (C) 2015-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptTraits_H #define AdeptTraits_H 1 #include #include #include #include #ifdef ADEPT_CXX11_FEATURES #include #endif namespace adept { // Forward declaration of "Active" template class Active; // All traits are in the adept::internal namespace. Note that many // of these are part of the STL in C++11 but are needed so that // Adept can be used with C++98 compilers. namespace internal { // ----- CONTENTS ----- // 1. ADEPT_STATIC_ASSERT // 2. enable_if // 3. if_then_else // 4. is_not_expression // 5. is_complex // 6. is_active // 7. is_array // 8. is_scalar_int // 9. all_scalar_ints // 10. underlying_real // 11. underlying_passive // 12. promote // 13. rank_compatible // 14. is_same // 15. remove_reference // 16. initializer_list_rank // 17. matrix_op_defined // 18. is_floating_point // -------------------- // --------------------------------------------------------------------- // 1. ADEPT_STATIC_ASSERT // --------------------------------------------------------------------- // Heavily templated C++ code as in the Adept library can produce // very long and cryptic compiler error messages. This macro is // useful to check for conditions that should not happen. It check // a bool known at compile time is true, otherwise fail to compile // with a message that is hopefully understandable. // E.g. ADEPT_STATIC_ASSERT(0 > 1, ZERO_IS_NOT_GREATER_THAN_ONE) // would fail at compile time with a message containing // ERROR_ZERO_IS_NOT_GREATER_THAN_ONE, which should hopefully // stand out even in a long error message. // Helper class template struct compile_time_check { typedef int STATIC_ASSERTION_HAS_FAILED; }; template<> struct compile_time_check { }; // Define the macro in which a struct is defined that inherits // from compile_time_check #if defined(__GNUC__) && !defined(__INTEL_COMPILER) #pragma GCC diagnostic ignored "-Wpragmas" #pragma GCC diagnostic ignored "-Wunused-local-typedefs" #pragma GCC diagnostic warning "-Wpragmas" #endif #define ADEPT_STATIC_ASSERT(condition, msg) \ do { struct ERROR_##msg : public ::adept::internal::compile_time_check<(condition)> { }; \ typedef typename ERROR_##msg ::STATIC_ASSERTION_HAS_FAILED type; \ } while (0) // --------------------------------------------------------------------- // 2. enable_if // --------------------------------------------------------------------- // To enable a function "Type function()" only if CONDITION is // true, replace "Type" in the function declaration with "typename // enable_if::type" template struct enable_if { }; // Partial specialization for true. template struct enable_if { typedef T type; }; // --------------------------------------------------------------------- // 3. if_then_else // --------------------------------------------------------------------- // "if_then_else::type" resolves to YES if // CONDITION is "true", NO otherwise. A limitation is that both Y // and N must be valid types template struct if_then_else { typedef Y type; }; template struct if_then_else { typedef N type; }; // --------------------------------------------------------------------- // 4. is_not_expression // --------------------------------------------------------------------- // The following enables us to provide functions that work only on // types *not* derived from the Expression struct: // "is_not_expression::value" is "false" if E is not an // expression and "true" otherwise template struct is_not_expression { private: typedef char yes; typedef struct { char array[2]; } no; template static yes test(typename C::_adept_expression_flag*); template static no test(...); public: static const bool value = sizeof(test(0)) != sizeof(yes); }; // --------------------------------------------------------------------- // 5. is_complex // --------------------------------------------------------------------- // Test for complex numbers: "is_complex::value" is "true" if S // is complex, "false" otherwise template struct is_complex { static const bool value = false; }; template <> struct is_complex > { static const bool value = true; }; template <> struct is_complex > { static const bool value = true; }; template <> struct is_complex > { static const bool value = true; }; // --------------------------------------------------------------------- // 6. is_active // --------------------------------------------------------------------- // Test for active numbers: "is_active::value" is "true" if S // is active, "false" otherwise. // Then the default case for non-expressions returns false template struct expr_cast; // Forward declaration template struct is_active { }; template struct is_active::value>::type> { static const bool value = false; }; // Expressions define a static const bool called "is_active" template struct is_active::value>::type> { static const bool value = expr_cast::is_active; }; // --------------------------------------------------------------------- // 7. is_array // --------------------------------------------------------------------- /* // "is_array::value" is "true" if E is an array expression and // "false" otherwise. The default case for non-expressions // returns false template struct is_array { }; template struct is_array::value>::type> { static const bool value = false; }; // Expressions define a static const bool called "is_array" template struct is_array::value>::type> { static const bool value = T::is_array; }; */ // --------------------------------------------------------------------- // 8. is_scalar_int // --------------------------------------------------------------------- // Return whether template argument is of integer type, or is a // 0-dimensional expression of integer type template struct is_scalar_int { }; template struct is_scalar_int::value>::type> { static const bool value = std::numeric_limits::is_integer; static const int count = value; }; template struct is_scalar_int::value>::type> { static const bool value = std::numeric_limits::is_integer && expr_cast::rank == 0; static const int count = value; }; // --------------------------------------------------------------------- // 9. all_scalar_ints // --------------------------------------------------------------------- // all_scalar_ints::value returns true if I[0] to // I[Rank-1] are all scalar integers // First define a "null" type struct null_type { }; template struct is_null_type { static const bool value = false; static const int count = 0; }; template <> struct is_null_type{ static const bool value = true; static const int count = 1; }; template struct all_scalar_ints { static const bool value = (Rank == (is_scalar_int::count +is_scalar_int::count +is_scalar_int::count +is_scalar_int::count +is_scalar_int::count +is_scalar_int::count +is_scalar_int::count)); }; // --------------------------------------------------------------------- // 10. underlying_real // --------------------------------------------------------------------- // Return the underlying real type for a complex argument: // "underlying_real::type returns T if S is of type // std::complex, or returns S if it is not complex /* template struct underlying_real { private: template struct _underlying_real { typedef S type; }; template struct _underlying_real { typedef typename S::type type; }; public: typedef typename _underlying_real::value, T>::type type; }; */ template struct underlying_real { typedef T type; }; template struct underlying_real > { typedef T type; }; // --------------------------------------------------------------------- // 11. underlying_passive // --------------------------------------------------------------------- // Return the underlying passive type for an active argument: // "underlying_passive::type returns T if S is of type // adept::Active, or returns S if it is not active. template struct underlying_passive { private: template struct _underlying_passive { typedef S type; }; template struct _underlying_passive { typedef typename S::type type; }; public: typedef typename _underlying_passive::value, T>::type type; }; // --------------------------------------------------------------------- // 12. promote // --------------------------------------------------------------------- // "promote::type" returns the type that a binary operation // (e.g. multiplication) between types L and R should result in. // Note that "complexity" and "precision" are promoted separately, // so double + std::complex will result in an object of // type std::complex >. template struct promote { private: template struct promote_primitive { static const bool A_bigger_than_B = (sizeof(A) > sizeof(B)); static const bool A_float_B_int = (!std::numeric_limits::is_integer) && std::numeric_limits::is_integer; static const bool A_int_B_float = std::numeric_limits::is_integer && (!std::numeric_limits::is_integer); static const bool prefer_float = A_float_B_int || A_int_B_float; typedef typename if_then_else::type float_type; typedef typename if_then_else::type biggest_type; typedef typename if_then_else::type type; }; typedef typename promote_primitive< typename underlying_real::type>::type, typename underlying_real::type>::type>::type real; typedef typename if_then_else::value || is_complex::value, std::complex, real>::type complex_type; public: typedef typename if_then_else::value || is_active::value, adept::Active, complex_type>::type type; }; // If ever the template arguments are the same // (e.g. Packet), we simply return this type template struct promote { typedef T type; }; // --------------------------------------------------------------------- // 13. rank_compatible // --------------------------------------------------------------------- // Check that an array of rank LRank could enter an operation // (e.g. addition) with an array of rank RRank: the two ranks must // either be the same, or either can be zero template struct rank_compatible { static const bool value = (LRank == RRank || LRank == 0 || RRank == 0); }; // --------------------------------------------------------------------- // 14. is_same // --------------------------------------------------------------------- // Compare two types to see if they're the same template struct is_same { static const bool value = false; }; template struct is_same { static const bool value = true; }; // --------------------------------------------------------------------- // 15. remove_reference // --------------------------------------------------------------------- // Remove reference from a type if present template struct remove_reference { typedef T type; }; template struct remove_reference { typedef T type; }; // --------------------------------------------------------------------- // 16. initializer_list_rank // --------------------------------------------------------------------- #ifdef ADEPT_CXX11_FEATURES // initializer_link_rank::value returns 0 if T is not a // std:initializer_list, otherwise it returns the number of nested // std::initializer_list's template struct is_initializer_list { static const bool value = false; }; template struct is_initializer_list > { static const bool value = true; }; template struct initializer_list_rank { }; template struct initializer_list_rank::value>::type> { typedef T type; static const int value = 0; }; template struct initializer_list_rank, typename enable_if::value>::type> { typedef T type; static const int value = 1; }; template struct initializer_list_rank, typename enable_if::value>::type> { typedef typename initializer_list_rank::type type; static const int value = 1 + initializer_list_rank::value; }; #endif // --------------------------------------------------------------------- // 17. matrix_op_defined // --------------------------------------------------------------------- // Return true if a type is float or double, false otherwise template struct matrix_op_defined { static const bool value = false; }; template <> struct matrix_op_defined { static const bool value = true; }; template <> struct matrix_op_defined { static const bool value = true; }; // --------------------------------------------------------------------- // 18. is_floating_point // --------------------------------------------------------------------- template struct is_floating_point { static const bool value = false; }; template <> struct is_floating_point { static const bool value = true; }; template <> struct is_floating_point { static const bool value = true; }; template <> struct is_floating_point { static const bool value = true; }; } // End namespace internal } // End namespace adept #endif ================================================ FILE: include/adept/vector_utilities.h ================================================ /* vector_utilities.h -- Vector utility functions Copyright (C) 2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptVectorUtilities_H #define AdeptVectorUtilities_H #include namespace adept { Array<1,Real,false> linspace(Real x1, Real x2, Index n); } #endif ================================================ FILE: include/adept/where.h ================================================ /* where.h -- Support for Fortran-90-like "where" construct Copyright (C) 2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. Consider the following: A.where(B) = C; A.where(B) = either_or(C, D); where A is an Array, B is a boolean expression, and C and D are expressions, and the arrays and expressions have the same rank and size, except that C and or D may have rank zero. The first line has the effect of setting every element of A for which B is true to the corresponding value in C. The second line does this but for elements where B is false it sets A instead to D. */ #ifndef AdeptWhere_H #define AdeptWhere_H 1 #include #include namespace adept { namespace internal { // --------------------------------------------------------------------- // Section 1. EitherOr object returned by either_or function // --------------------------------------------------------------------- template class EitherOr { public: typedef bool _adept_either_or_flag; EitherOr(const C& c, const D& d) : either_(c), or_(d) { } const C& value_if_true() const { return either_; } const D& value_if_false() const { return or_; } protected: const C& either_; const D& or_; }; template struct is_not_either_or { private: typedef char yes; typedef struct { char array[2]; } no; template static yes test(typename C::_adept_either_or_flag*); template static no test(...); public: static const bool value = sizeof(test(0)) != sizeof(yes); }; // --------------------------------------------------------------------- // Section 2. Where class returned by A.where(B) // --------------------------------------------------------------------- template class Where { public: Where(A& a, const B& b) : array_(a), bool_expr_(b) { } template typename enable_if::value, Where&>::type operator=(const C& c) { array_.assign_conditional(bool_expr_, c); return *this; } // With either_or on the right-hand-side: this implementation // could be faster if bool_expr was not evaluated twice template typename enable_if::value, Where&>::type operator=(const C& c) { array_.assign_conditional(!const_cast(bool_expr_), c.value_if_false()); array_.assign_conditional(bool_expr_, c.value_if_true()); return *this; } #define ADEPT_WHERE_OPERATOR(EQ_OP, OP) \ template \ typename enable_if::value, Where&>::type \ EQ_OP(const C& c) { \ array_.assign_conditional(bool_expr_, noalias(*this) OP c); \ return *this; \ } \ template \ typename enable_if::value, Where&>::type \ EQ_OP(const C& c) { \ array_.assign_conditional(!const_cast(bool_expr_), \ noalias(*this) OP c.value_if_false()); \ array_.assign_conditional(bool_expr_, \ noalias(*this) OP c.value_if_true()); \ return *this; \ } ADEPT_WHERE_OPERATOR(operator+=, +) ADEPT_WHERE_OPERATOR(operator-=, -) ADEPT_WHERE_OPERATOR(operator*=, *) ADEPT_WHERE_OPERATOR(operator/=, /) #undef ADEPT_WHERE_OPERATOR protected: A& array_; const B& bool_expr_; }; } // end namespace internal template internal::EitherOr either_or(const C& c, const D& d) { return internal::EitherOr(c, d); } } // end namespace adept #endif ================================================ FILE: include/adept.h ================================================ /* adept.h -- Header file for basic scalar functionality of Adept automatic differentiation library Copyright (C) 2015-2016 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef Adept_H #define Adept_H 1 #include #include #include #include #include #endif ================================================ FILE: include/adept_arrays.h ================================================ /* adept_arrays.h -- Header file for array functionality of Adept automatic differentiation library Copyright (C) 2014-2015 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptArrays_H #define AdeptArrays_H 1 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif ================================================ FILE: include/adept_fortran.h ================================================ /* adept_fortran.h -- Interoperability between Adept and Fortran-90 arrays Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. Fortran-90 introduced multi-dimensional arrays with essentially the same basic capabilities as passive Adept arrays, including the ability to index strided data in memory. The improved interoperability features of Fortran 2018 enable Fortran array data to be passed to and from C/C++. This header file enables passive Adept arrays to be passed to and from Fortran. PASSING ARRAYS FROM FORTRAN TO C++ A C++ subroutine callable from Fortran could be declared in C++ as: extern "C" void adept_subroutine(adept::FortranArray* int_arr, adept::FortranArray* dbl_arr); where FortranArray is a C++ class wrapping the CFI_cdesc_t type that contains the Fortran array descriptor. Within the definition of this function, Adept arrays may be associated with the Fortran data as follows: adept::intMatrix imat; adept::associate(imat, int_arr); imat >>= int_arr; // Alternative form In this example, the matrix of integers "imat" shares its data with the Fortran array int_arr. An exception will be thrown if the Fortran array is not of type integer and rank 2. Note that the array indexing of imat will be in the standard C/C++ convention, zero-based and with the final index varying fastest as memory is traversed. This is opposite to the way the array is accessed in Fortran. The ">>=" provides a more succinct way to do the same thing. Consider the following: adept::Matrix dmat; adept::associate(dmat, dbl_arr, true); Here, the third argument "true" indicates that the array strides of dmat are to be configured so that the array indices are the same as in Fortran (although still zero based). This will impede optimization of some array expressions using dmat, since the second dimension of dmat will not be contiguous in memory, and this is the dimension that Adept attempts to vectorize. PASSING ARRAYS FROM ADEPT TO FORTRAN A Fortran-implemented subroutine could be declared in C++ as follows: extern "C" void fort_subroutine(adept::FortranArray* int_arr, adept::FortranArray* dbl_arr); To call this routine from C++, passing Adept arrays "imat" and "dmat" as the arguments, we can do simply: fort_subroutine(FortranArray(imat), FortranArray(dmat)); */ #ifndef AdeptFortran_H #define AdeptFortran_H 1 #include #include // GNU defines CFI_type_Bool as "_Bool", but this is only available in // C99, not C++, so we make it an alias for C++'s "bool" #ifdef __GNUC__ #include #endif // Load the Fortran array interface into the global namespace #include namespace adept { namespace internal { // Helper types such that cfi_type::type returns the integer // type of "X", or fails to compile if it is not possible to send // an array of type X to Fortran template struct cfi_type { }; // Fails to compile if attempt to access "type" template <> struct cfi_type { static const CFI_type_t type = CFI_type_signed_char; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_short; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_int; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_long; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_long_long; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_Bool; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_float; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_double; }; template <> struct cfi_type { static const CFI_type_t type = CFI_type_long_double; }; template <> struct cfi_type > { static const CFI_type_t type = CFI_type_float_Complex; }; template <> struct cfi_type > { static const CFI_type_t type = CFI_type_long_double_Complex; }; template <> struct cfi_type > { static const CFI_type_t type = CFI_type_long_double_Complex; }; } // This class is essentially a wrapper around the CFI_cdesc_t type // which stores a Fortran array descriptor which could be for an // array of any rank or type class FortranArray { protected: // Data: the Fortran array descriptor CFI_cdesc_t type, but the // version configured for the maximum allowable Fortran rank CFI_CDESC_T(CFI_MAX_RANK) ad; public: // This class either exists as a pointer to a Fortran array passed // in from a Fortran routine, or as an object pointing to an Adept // array that is about to be passed into a Fortran routine. // Therefore it can only be constructed from an existing Adept // array. FortranArray() = delete; // Initialize from Adept array. By default, the dimensions will // need to be accessed in opposite order in Fortran than in // C++/Adept, reflecting the default column-major array access of // the former and row-major array access of the latter. But by // providing preserve_dim_order=true, the dimension access order // will be preserved between the two. template FortranArray(adept::Array& a, bool preserve_dim_order = false) { init(a, preserve_dim_order); } // No way to ensure that Fortran cannot modify an array, // unfortunately, so we need to cast away the const-ness template FortranArray(const adept::Array& a, bool preserve_dim_order = false) { init(const_cast&>(a), preserve_dim_order); } protected: // Constructor implementation: initialize CFI_cdesc_t elements // from Adept array template void init(adept::Array& a, bool preserve_dim_order) { ADEPT_STATIC_ASSERT(Rank <= CFI_MAX_RANK, ARRAY_RANK_EXCEEDS_FORTRAN_MAXIMUM); ad.base_addr = static_cast(a.data()); ad.elem_len = sizeof(Type); ad.version = CFI_VERSION; ad.rank = Rank; ad.attribute = CFI_attribute_other; ad.type = internal::cfi_type::type; if (!preserve_dim_order) { for (int irank = 0; irank < Rank; ++irank) { ad.dim[irank].lower_bound = 0; ad.dim[irank].extent = a.dimension(Rank-irank-1); ad.dim[irank].sm = a.offset(Rank-irank-1)*sizeof(Type); } } else { for (int irank = 0; irank < Rank; ++irank) { ad.dim[irank].lower_bound = 0; ad.dim[irank].extent = a.dimension(irank); ad.dim[irank].sm = a.offset(irank)*sizeof(Type); } } } public: // Query the rank and type of the Fortran array int rank() const { return ad.rank; } int type_code() const { return ad.type; } // Return "true" if the rank or type equal the template parameters // Rank and Type template bool is_rank() const { return (Rank == ad.rank); } template bool is_type() const { return (internal::cfi_type::type == ad.type && sizeof(Type) == ad.elem_len); } // Return the length or stride in memory of a particular dimension CFI_index_t dimension(int idim) const { return ad.dim[idim].extent; } CFI_index_t offset(int idim) const { return ad.dim[idim].sm/ad.elem_len; } // Throw an exception if the rank or type differ from the template // parameters Rank and Type template void verify() const { if (!is_rank()) { throw fortran_interoperability_error( "Rank of Fortran array does not match expected rank"); } else if (!is_type()) { throw fortran_interoperability_error( "Type of Fortran array does not match expected type"); } } // Return a pointer to the underlying data casting to the // specified Type template Type* data() { return static_cast(ad.base_addr); } // Allow this object to be passed to a function expecting a // pointer operator CFI_cdesc_t*() { return reinterpret_cast(&ad); } operator FortranArray*() { return this; } }; // Associate Adept array "a" with Fortran array "fa" so that // subsequent changes to the elements of "a" will be seen within // Fortran when the C++ routine returns. template void associate(Array& a, FortranArray* fa, bool preserve_dim_order = false) { fa->verify(); // Verify rank and type ExpressionSize dims, offs; if (!preserve_dim_order) { for (int irank = 0; irank < Rank; ++irank) { dims[Rank-irank-1] = fa->dimension(irank); offs[Rank-irank-1] = fa->offset(irank); } } else { for (int irank = 0; irank < Rank; ++irank) { dims[irank] = fa->dimension(irank); offs[irank] = fa->offset(irank); } } a.clear(); a = Array(static_cast(fa->data()), 0, dims, offs); } // Associate Adept array "a" with a general Fortran array descriptor // "cd", noting that we only verify that the rank and type match // when the "associate" function above is called. template void associate(Array& a, CFI_cdesc_t* cd, bool preserve_dim_order = false) { FortranArray* fa = reinterpret_cast(cd); associate(a, fa, preserve_dim_order); } // Enable link of an Adept array to a Fortran array using the >>= // operator template void operator>>=(adept::Array& a, FortranArray* fa) { associate(a,fa); } template void operator>>=(adept::Array& a, CFI_cdesc_t* cd) { associate(a,cd); } } // End namespace adept #endif ================================================ FILE: include/adept_optimize.h ================================================ /* adept_optimize.h -- Header file for optimization algorithms of Adept library Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan This file is part of the Adept library. */ #ifndef AdeptOptimize_H #define AdeptOptimize_H 1 #include #include #include #endif ================================================ FILE: include/create_adept_source_header ================================================ #!/bin/sh # This script creates a header file "adept_source.h" containing the # ../adept/*.h ../adept/*.cpp source files; why this is useful is explained below. ADEPT_SOURCE_HEADER=adept_source.h rm -f $ADEPT_SOURCE_HEADER echo "Creating $ADEPT_SOURCE_HEADER" echo "/* $ADEPT_SOURCE_HEADER - Source code for the Adept library Copyright (C) 2012-2015 The University of Reading Copyright (C) 2015- European Centre for Medium-Range Weather Forecasts Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. This file was created automatically by script $0 on "$(date)" It contains a concatenation of the source files from the Adept library. The idea is that a program may #include this file in one of its source files (typically the one containing the main function), and then the Adept library will be built into the executable without the need to link to an external library. All other source files should just #include or . The ability to use Adept in this way makes it easier to distribute an Adept package that is usable on non-Unix platforms that are unable to use the autoconf configure script to build external libraries. If HAVE_BLAS is defined below then matrix multiplication will be enabled; the BLAS library should be provided at the link stage although no header file is required. If HAVE_LAPACK is defined below then linear algebra routines will be enabled (matrix inverse and solving linear systems of equations); again, the LAPACK library should be provided at the link stage although no header file is required. */ /* Feel free to delete this warning: */ #ifdef _MSC_FULL_VER #pragma message(\"warning: the adept_source.h header file has not been edited so BLAS matrix multiplication and LAPACK linear-algebra support have been disabled\") #else #warning \"The adept_source.h header file has not been edited so BLAS matrix multiplication and LAPACK linear-algebra support have been disabled\" #endif /* Uncomment this if you are linking to the BLAS library (header file not required) to enable matrix multiplication */ //#define HAVE_BLAS 1 /* Uncomment this if you are linking to the LAPACK library (header file not required) */ //#define HAVE_LAPACK 1 /* Uncomment this if you have the cblas.h header from OpenBLAS */ //#define HAVE_OPENBLAS_CBLAS_HEADER /* The individual source files now follow. */ #ifndef AdeptSource_H #define AdeptSource_H 1 " > $ADEPT_SOURCE_HEADER for FILE in ../config_platform_independent.h ../adept/*.h ../adept/*.cpp do echo " Adding $FILE" echo " // ================================================================= // Contents of $(basename $FILE) // ================================================================= " >> $ADEPT_SOURCE_HEADER cat $FILE >> $ADEPT_SOURCE_HEADER done echo " #endif " >> $ADEPT_SOURCE_HEADER echo "Done" ================================================ FILE: m4/adept.m4 ================================================ # --------------------------------------------------------------------------- # FILE : adept.m4 # COPYRIGHT : 2018- ECMWF # AUTHOR : Alessio Bozzo # LICENSE : Apache License Version 2.0 # ---------------------------------------------------------------------------- # # This software is licensed under the terms of the Apache Licence # Version 2.0 which can be obtained at # http://www.apache.org/licenses/LICENSE-2.0. In applying this # licence, ECMWF does not waive the privileges and immunities granted # to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. # # ---------------------------------------------------------------------------- # # This file contains a macro processor (m4 file) to enable autotools # to locate the Adept C++ library (version 2.0 or greater). The file # should be placed in the m4 directory of your package. If you have # aclocal.m4 in your top-level directory then it will be found # automatically; otherwise you will need the following in your # configure.ac file: # # m4_include([m4/adept.m4]) # # Usage is then as follows in the configure.ac file # # AX_CHECK_ADEPT([ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) # # for example: # # AX_CHECK_ADEPT([have_adept=yes], [have_adept=no]) # # This creates variables ADEPT_LDFLAGS and ADEPT_CPPFLAGS, and adds # them to LDFLAGS and CPPFLAGS. # # The macro looks for the Adept library in system directories, but the # user can specify another location by passing an argument to the # configure script as follows: # # ./configure --with-adept=/home/me/apps/adept-2.1 # # ---------------------------------------------------------------------------- dnl defines a custom macro AC_DEFUN([AX_CHECK_ADEPT], [ dnl provides a framework to handle the --with-{arg} values passed to configure on the command line AC_ARG_WITH([adept], [AS_HELP_STRING([--with-adept=DIR], [use Adept Library from directory DIR])], adept_prefix="$with_adept" [] ) AS_IF([test x$adept_prefix != x], [AS_IF([test -d "$adept_prefix/lib"], [ADEPT_LDFLAGS="-L$adept_prefix/lib -Wl,-rpath,$adept_prefix/lib -ladept" ADEPT_CPPFLAGS="-I$adept_prefix/include"], [test -d "$adept_prefix/lib64"], [ADEPT_LDFLAGS="-L$adept_prefix/lib64 -Wl,-rpath,$adept_prefix/lib64 -ladept" ADEPT_CPPFLAGS="-I$adept_prefix/include"], [AC_MSG_ERROR([ ----------------------------------------------------------------------------- --with-adept=$adept_prefix is not a valid directory -----------------------------------------------------------------------------])])], [AC_MSG_WARN([ ----------------------------------------------------------------------------- Missing option `--with-adept=DIR`. Looking for Adept Library into Linux default library search paths -----------------------------------------------------------------------------])] ) LDFLAGS="$ADEPT_LDFLAGS $LDFLAGS" CPPFLAGS="$ADEPT_CPPFLAGS $CPPFLAGS" ax_have_adept=yes dnl checks for ADEPT AC_MSG_CHECKING([for Adept >= 2.0.4: including adept_arrays.h and linking via -ladept]) AC_LINK_IFELSE([AC_LANG_PROGRAM([#include #include #if ADEPT_VERSION < 20004 #error "Adept version >= 2.0.4 required" #endif],[std::string test = adept::compiler_version()])],AC_MSG_RESULT([yes]),AC_MSG_RESULT([no]) AC_MSG_ERROR([Unable to find Adept library version >= 2.0.4])) AS_IF([test "x$ax_have_adept" = xyes], dnl outputing Adept Library [AC_SUBST([ADEPT_LDFLAGS]) AC_SUBST([ADEPT_CPPFLAGS]) $1], [$2]) ] ) dnl vim:set softtabstop=4 shiftwidth=4 expandtab: ================================================ FILE: m4/ax_blas.m4 ================================================ # =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_blas.html # =========================================================================== # # SYNOPSIS # # AX_BLAS([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro looks for a library that implements the BLAS linear-algebra # interface (see http://www.netlib.org/blas/). On success, it sets the # BLAS_LIBS output variable to hold the requisite library linkages. # # To link with BLAS, you should link with: # # $BLAS_LIBS $LIBS $FLIBS # # in that order. FLIBS is the output variable of the # AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is # sometimes necessary in order to link with F77 libraries. Users will also # need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same # reason. # # Many libraries are searched for, from ATLAS to CXML to ESSL. The user # may also use --with-blas= in order to use some specific BLAS # library . In order to link successfully, however, be aware that you # will probably need to use the same Fortran compiler (which can be set # via the F77 env. var.) as was used to compile the BLAS library. # # ACTION-IF-FOUND is a list of shell commands to run if a BLAS library is # found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is # not found. If ACTION-IF-FOUND is not specified, the default action will # define HAVE_BLAS. # # LICENSE # # Copyright (c) 2008 Steven G. Johnson # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 14 AU_ALIAS([ACX_BLAS], [AX_BLAS]) AC_DEFUN([AX_BLAS], [ AC_PREREQ(2.50) AC_REQUIRE([AC_F77_LIBRARY_LDFLAGS]) AC_REQUIRE([AC_CANONICAL_HOST]) ax_blas_ok=no AC_ARG_WITH(blas, [AS_HELP_STRING([--with-blas=], [use BLAS library ])]) case $with_blas in yes | "") ;; no) ax_blas_ok=disable ;; -* | */* | *.a | *.so | *.so.* | *.o) BLAS_LIBS="$with_blas" ;; *) BLAS_LIBS="-l$with_blas" ;; esac # Get fortran linker names of BLAS functions to check for. AC_F77_FUNC(sgemm) AC_F77_FUNC(dgemm) ax_blas_save_LIBS="$LIBS" LIBS="$LIBS $FLIBS" # First, check BLAS_LIBS environment variable if test $ax_blas_ok = no; then if test "x$BLAS_LIBS" != x; then save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS" AC_MSG_CHECKING([for $sgemm in $BLAS_LIBS]) AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes], [BLAS_LIBS=""]) AC_MSG_RESULT($ax_blas_ok) LIBS="$save_LIBS" fi fi # BLAS linked to by default? (happens on some supercomputers) if test $ax_blas_ok = no; then save_LIBS="$LIBS"; LIBS="$LIBS" AC_MSG_CHECKING([if $sgemm is being linked in already]) AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes]) AC_MSG_RESULT($ax_blas_ok) LIBS="$save_LIBS" fi # BLAS in OpenBLAS library? (http://xianyi.github.com/OpenBLAS/) if test $ax_blas_ok = no; then AC_CHECK_LIB(openblas, $sgemm, [ax_blas_ok=yes BLAS_LIBS="-lopenblas"]) fi # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) if test $ax_blas_ok = no; then AC_CHECK_LIB(atlas, ATL_xerbla, [AC_CHECK_LIB(f77blas, $sgemm, [AC_CHECK_LIB(cblas, cblas_dgemm, [ax_blas_ok=yes BLAS_LIBS="-lcblas -lf77blas -latlas"], [], [-lf77blas -latlas])], [], [-latlas])]) fi # BLAS in PhiPACK libraries? (requires generic BLAS lib, too) if test $ax_blas_ok = no; then AC_CHECK_LIB(blas, $sgemm, [AC_CHECK_LIB(dgemm, $dgemm, [AC_CHECK_LIB(sgemm, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lsgemm -ldgemm -lblas"], [], [-lblas])], [], [-lblas])]) fi # BLAS in Intel MKL library? if test $ax_blas_ok = no; then # MKL for gfortran if test x"$ac_cv_fc_compiler_gnu" = xyes; then # 64 bit if test $host_cpu = x86_64; then AC_CHECK_LIB(mkl_gf_lp64, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread"],, [-lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread]) # 32 bit elif test $host_cpu = i686; then AC_CHECK_LIB(mkl_gf, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl_gf -lmkl_sequential -lmkl_core -lpthread"],, [-lmkl_gf -lmkl_sequential -lmkl_core -lpthread]) fi # MKL for other compilers (Intel, PGI, ...?) else # 64-bit if test $host_cpu = x86_64; then AC_CHECK_LIB(mkl_intel_lp64, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread"],, [-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread]) # 32-bit elif test $host_cpu = i686; then AC_CHECK_LIB(mkl_intel, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl_intel -lmkl_sequential -lmkl_core -lpthread"],, [-lmkl_intel -lmkl_sequential -lmkl_core -lpthread]) fi fi fi # Old versions of MKL if test $ax_blas_ok = no; then AC_CHECK_LIB(mkl, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lmkl -lguide -lpthread"],,[-lguide -lpthread]) fi # BLAS in Apple vecLib library? if test $ax_blas_ok = no; then save_LIBS="$LIBS"; LIBS="-framework vecLib $LIBS" AC_MSG_CHECKING([for $sgemm in -framework vecLib]) AC_TRY_LINK_FUNC($sgemm, [ax_blas_ok=yes;BLAS_LIBS="-framework vecLib"]) AC_MSG_RESULT($ax_blas_ok) LIBS="$save_LIBS" fi # BLAS in Alpha CXML library? if test $ax_blas_ok = no; then AC_CHECK_LIB(cxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-lcxml"]) fi # BLAS in Alpha DXML library? (now called CXML, see above) if test $ax_blas_ok = no; then AC_CHECK_LIB(dxml, $sgemm, [ax_blas_ok=yes;BLAS_LIBS="-ldxml"]) fi # BLAS in Sun Performance library? if test $ax_blas_ok = no; then if test "x$GCC" != xyes; then # only works with Sun CC AC_CHECK_LIB(sunmath, acosp, [AC_CHECK_LIB(sunperf, $sgemm, [BLAS_LIBS="-xlic_lib=sunperf -lsunmath" ax_blas_ok=yes],[],[-lsunmath])]) fi fi # BLAS in SCSL library? (SGI/Cray Scientific Library) if test $ax_blas_ok = no; then AC_CHECK_LIB(scs, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lscs"]) fi # BLAS in SGIMATH library? if test $ax_blas_ok = no; then AC_CHECK_LIB(complib.sgimath, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lcomplib.sgimath"]) fi # BLAS in IBM ESSL library? (requires generic BLAS lib, too) if test $ax_blas_ok = no; then AC_CHECK_LIB(blas, $sgemm, [AC_CHECK_LIB(essl, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lessl -lblas"], [], [-lblas $FLIBS])]) fi # Generic BLAS library? if test $ax_blas_ok = no; then AC_CHECK_LIB(blas, $sgemm, [ax_blas_ok=yes; BLAS_LIBS="-lblas"]) fi AC_SUBST(BLAS_LIBS) LIBS="$ax_blas_save_LIBS" # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test x"$ax_blas_ok" = xyes; then ifelse([$1],,AC_DEFINE(HAVE_BLAS,1,[Define if you have a BLAS library.]),[$1]) : else ax_blas_ok=no $2 fi ])dnl AX_BLAS ================================================ FILE: m4/ax_lapack.m4 ================================================ # =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_lapack.html # =========================================================================== # # SYNOPSIS # # AX_LAPACK([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) # # DESCRIPTION # # This macro looks for a library that implements the LAPACK linear-algebra # interface (see http://www.netlib.org/lapack/). On success, it sets the # LAPACK_LIBS output variable to hold the requisite library linkages. # # To link with LAPACK, you should link with: # # $LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS # # in that order. BLAS_LIBS is the output variable of the AX_BLAS macro, # called automatically. FLIBS is the output variable of the # AC_F77_LIBRARY_LDFLAGS macro (called if necessary by AX_BLAS), and is # sometimes necessary in order to link with F77 libraries. Users will also # need to use AC_F77_DUMMY_MAIN (see the autoconf manual), for the same # reason. # # The user may also use --with-lapack= in order to use some specific # LAPACK library . In order to link successfully, however, be aware # that you will probably need to use the same Fortran compiler (which can # be set via the F77 env. var.) as was used to compile the LAPACK and BLAS # libraries. # # ACTION-IF-FOUND is a list of shell commands to run if a LAPACK library # is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it # is not found. If ACTION-IF-FOUND is not specified, the default action # will define HAVE_LAPACK. # # LICENSE # # Copyright (c) 2009 Steven G. Johnson # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General # Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program. If not, see . # # As a special exception, the respective Autoconf Macro's copyright owner # gives unlimited permission to copy, distribute and modify the configure # scripts that are the output of Autoconf when processing the Macro. You # need not follow the terms of the GNU General Public License when using # or distributing such scripts, even though portions of the text of the # Macro appear in them. The GNU General Public License (GPL) does govern # all other use of the material that constitutes the Autoconf Macro. # # This special exception to the GPL applies to versions of the Autoconf # Macro released by the Autoconf Archive. When you make and distribute a # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. #serial 7 AU_ALIAS([ACX_LAPACK], [AX_LAPACK]) AC_DEFUN([AX_LAPACK], [ AC_REQUIRE([AX_BLAS]) ax_lapack_ok=no AC_ARG_WITH(lapack, [AS_HELP_STRING([--with-lapack=], [use LAPACK library ])]) case $with_lapack in yes | "") ;; no) ax_lapack_ok=disable ;; -* | */* | *.a | *.so | *.so.* | *.o) LAPACK_LIBS="$with_lapack" ;; *) LAPACK_LIBS="-l$with_lapack" ;; esac # Get fortran linker name of LAPACK function to check for. AC_F77_FUNC(cheev) # We cannot use LAPACK if BLAS is not found if test "x$ax_blas_ok" != xyes; then ax_lapack_ok=noblas LAPACK_LIBS="" fi # First, check LAPACK_LIBS environment variable if test "x$LAPACK_LIBS" != x; then save_LIBS="$LIBS"; LIBS="$LAPACK_LIBS $BLAS_LIBS $LIBS $FLIBS" AC_MSG_CHECKING([for $cheev in $LAPACK_LIBS]) AC_TRY_LINK_FUNC($cheev, [ax_lapack_ok=yes], [LAPACK_LIBS=""]) AC_MSG_RESULT($ax_lapack_ok) LIBS="$save_LIBS" if test $ax_lapack_ok = no; then LAPACK_LIBS="" fi fi # LAPACK linked to by default? (is sometimes included in BLAS lib) if test $ax_lapack_ok = no; then save_LIBS="$LIBS"; LIBS="$LIBS $BLAS_LIBS $FLIBS" AC_CHECK_FUNC($cheev, [ax_lapack_ok=yes]) LIBS="$save_LIBS" fi # Generic LAPACK library? for lapack in lapack lapack_rs6k; do if test $ax_lapack_ok = no; then save_LIBS="$LIBS"; LIBS="$BLAS_LIBS $LIBS" AC_CHECK_LIB($lapack, $cheev, [ax_lapack_ok=yes; LAPACK_LIBS="-l$lapack"], [], [$FLIBS]) LIBS="$save_LIBS" fi done AC_SUBST(LAPACK_LIBS) # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test x"$ax_lapack_ok" = xyes; then ifelse([$1],,AC_DEFINE(HAVE_LAPACK,1,[Define if you have LAPACK library.]),[$1]) : else ax_lapack_ok=no $2 fi ])dnl AX_LAPACK ================================================ FILE: m4/ltsugar.m4 ================================================ # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # # Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 6 ltsugar.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) # lt_join(SEP, ARG1, [ARG2...]) # ----------------------------- # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their # associated separator. # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier # versions in m4sugar had bugs. m4_define([lt_join], [m4_if([$#], [1], [], [$#], [2], [[$2]], [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) m4_define([_lt_join], [m4_if([$#$2], [2], [], [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) # lt_car(LIST) # lt_cdr(LIST) # ------------ # Manipulate m4 lists. # These macros are necessary as long as will still need to support # Autoconf-2.59 which quotes differently. m4_define([lt_car], [[$1]]) m4_define([lt_cdr], [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], [$#], 1, [], [m4_dquote(m4_shift($@))])]) m4_define([lt_unquote], $1) # lt_append(MACRO-NAME, STRING, [SEPARATOR]) # ------------------------------------------ # Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. # Note that neither SEPARATOR nor STRING are expanded; they are appended # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). # No SEPARATOR is output if MACRO-NAME was previously undefined (different # than defined and empty). # # This macro is needed until we can rely on Autoconf 2.62, since earlier # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. m4_define([lt_append], [m4_define([$1], m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) # ---------------------------------------------------------- # Produce a SEP delimited list of all paired combinations of elements of # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list # has the form PREFIXmINFIXSUFFIXn. # Needed until we can rely on m4_combine added in Autoconf 2.62. m4_define([lt_combine], [m4_if(m4_eval([$# > 3]), [1], [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl [[m4_foreach([_Lt_prefix], [$2], [m4_foreach([_Lt_suffix], ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) # ----------------------------------------------------------------------- # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. m4_define([lt_if_append_uniq], [m4_ifdef([$1], [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], [lt_append([$1], [$2], [$3])$4], [$5])], [lt_append([$1], [$2], [$3])$4])]) # lt_dict_add(DICT, KEY, VALUE) # ----------------------------- m4_define([lt_dict_add], [m4_define([$1($2)], [$3])]) # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) # -------------------------------------------- m4_define([lt_dict_add_subkey], [m4_define([$1($2:$3)], [$4])]) # lt_dict_fetch(DICT, KEY, [SUBKEY]) # ---------------------------------- m4_define([lt_dict_fetch], [m4_ifval([$3], m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) # ----------------------------------------------------------------- m4_define([lt_if_dict_fetch], [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], [$5], [$6])]) # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) # -------------------------------------------------------------- m4_define([lt_dict_filter], [m4_if([$5], [], [], [lt_join(m4_quote(m4_default([$4], [[, ]])), lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl ]) ================================================ FILE: m4/lt~obsolete.m4 ================================================ # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # # Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. # Written by Scott James Remnant, 2004. # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 5 lt~obsolete.m4 # These exist entirely to fool aclocal when bootstrapping libtool. # # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) # which have later been changed to m4_define as they aren't part of the # exported API, or moved to Autoconf or Automake where they belong. # # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us # using a macro with the same name in our local m4/libtool.m4 it'll # pull the old libtool.m4 in (it doesn't see our shiny new m4_define # and doesn't know about Autoconf macros at all.) # # So we provide this file, which has a silly filename so it's always # included after everything else. This provides aclocal with the # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything # because those macros already exist, or will be overwritten later. # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. # # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. # Yes, that means every name once taken will need to remain here until # we give up compatibility with versions before 1.7, at which point # we need to keep only those names which we still refer to. # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) ================================================ FILE: makefile_include.in ================================================ # Template for configure to create makefile_include, which is included # by test/Makefile and benchmark/Makefile AR = @AR@ CC = @CC@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXFLAGS = @CXXFLAGS@ @OPENMP_CXXFLAGS@ DEFS = @DEFS@ LD = @LD@ LDFLAGS = @LDFLAGS@ @OPENMP_CXXFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ SHELL = @SHELL@ LIBTOOL = @LIBTOOL@ USE_GSL = @USE_GSL@ ================================================ FILE: test/Makefile ================================================ # Makefile for example programs that demonstrate different features of # the Adept library # # Note that this Makefile is hand-coded rather than being generated by # automake # # The -DADEPT_RECORDING_PAUSABLE option enables the pause_recording # and continue_recording functionality and is used by test_adept, # although it will run correctly (but slightly more slowly) without # this flag # The configure script writes the following file, which contains # variables controlling the compilation include ../makefile_include # Uncomment the following to check what happens if thread safety # disabled # ADEPT_FLAGS = -DADEPT_STACK_THREAD_UNSAFE # The objects to create OBJECTS = algorithm.o algorithm_noad.o test_checkpoint.o \ test_adept.o test_adept_with_and_without_ad.o \ test_radiances.o simulate_radiances.o test_thread_safe.o \ test_no_lib.o test_misc.o test_arrays.o test_arrays_active.o \ test_array_speed.o test_arrays_active_pausable.o \ test_fixed_arrays_active.o test_radiances_array.o \ test_fixed_arrays.o test_constructors.o test_derivatives.o \ test_array_derivatives.o test_thread_safe_arrays.o \ test_complex_arrays.o test_packet_operations.o \ test_fastexp.o test_reduce_active.o test_minimizer.o \ test_interp.o GSL_OBJECTS = test_gsl_interface.o state.o rosenbrock_banana_function.o GSL_LIBS = -lgsl COMPILE_FLAGS = $(CXXFLAGS) -I../include $(CPPFLAGS) $(ADEPT_FLAGS) # Because we aren't going to install the test programs, and we want # them to work even if Adept is not installed, it is easiest to use # libtool to create statically-linked executables top_builddir = .. CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) \ -static -no-install -L../adept/.libs $(LDFLAGS) -ladept -o $@ # Link without the Adept library CXXLINK_NOLIB = $(LIBTOOL) --tag=CXX --mode=link $(CXX) $(CXXFLAGS) \ $(LDFLAGS) -o $@ # Dependency on the presence of the Adept static library LIBADEPT = ../adept/.libs/libadept.a MYLIBS = $(LIBS) PROGRAMS = test_adept test_adept_with_and_without_ad test_radiances \ test_gsl_interface test_misc test_checkpoint test_thread_safe \ test_array_speed test_no_lib test_radiances_array test_constructors \ test_arrays test_arrays_active test_arrays_active_pausable \ test_fixed_arrays test_fixed_arrays_active test_derivatives \ test_array_derivatives test_thread_safe_arrays test_complex_arrays \ test_packet_operations test_fastexp test_reduce_active test_minimizer \ test_interp all: @echo "********************************************************" @echo "*** To compile test programs in test/ and benchmark/ ***" @echo "*** type \"make check\" ***" @echo "********************************************************" # Compile all four programs check: $(PROGRAMS) run-tests run-tests: ./run_tests.sh $(PROGRAMS) # Test program 1 test_adept: algorithm.o test_adept.o $(LIBADEPT) $(CXXLINK) algorithm.o test_adept.o $(MYLIBS) # Test program 2 test_adept_with_and_without_ad: algorithm.o algorithm_noad.o test_adept_with_and_without_ad.o $(LIBADEPT) $(CXXLINK) algorithm.o algorithm_noad.o test_adept_with_and_without_ad.o $(MYLIBS) # Test program 3 test_radiances: simulate_radiances.o test_radiances.o $(LIBADEPT) $(CXXLINK) simulate_radiances.o test_radiances.o $(MYLIBS) ifeq "X$(USE_GSL)" "Xyes" # Test program 4 test_gsl_interface: $(GSL_OBJECTS) $(LIBADEPT) $(CXXLINK) $(GSL_OBJECTS) $(GSL_LIBS) $(MYLIBS) else test_gsl_interface: @echo "The executable test_gsl_interface will not be created because GSL library was not found" endif # Test program 5 test_misc: test_misc.o algorithm.o $(LIBADEPT) $(CXXLINK) test_misc.o algorithm.o $(MYLIBS) # Test program 6 test_checkpoint: test_checkpoint.o $(LIBADEPT) $(CXXLINK) test_checkpoint.o $(MYLIBS) # Test program 7 test_thread_safe: test_thread_safe.o $(LIBADEPT) $(CXXLINK) test_thread_safe.o $(MYLIBS) # Test program 8 (note that it is not linked against the Adept library) test_no_lib: test_no_lib.o algorithm.o $(CXXLINK_NOLIB) test_no_lib.o algorithm.o $(MYLIBS) # Test program 9a test_arrays: test_arrays.o $(LIBADEPT) $(CXXLINK) test_arrays.o $(MYLIBS) # Test program 9b test_arrays_active: test_arrays_active.o $(LIBADEPT) $(CXXLINK) test_arrays_active.o $(MYLIBS) # Test program 9c test_arrays_active_pausable: test_arrays_active_pausable.o $(LIBADEPT) $(CXXLINK) test_arrays_active_pausable.o $(MYLIBS) # Test program 9d test_complex_arrays: test_complex_arrays.o $(LIBADEPT) $(CXXLINK) test_complex_arrays.o $(MYLIBS) # Test program 10 test_array_speed: test_array_speed.o $(LIBADEPT) $(CXXLINK) test_array_speed.o $(MYLIBS) # Test program 11 test_radiances_array: simulate_radiances.o test_radiances_array.o $(LIBADEPT) $(CXXLINK) simulate_radiances.o test_radiances_array.o $(MYLIBS) # Test program 12a test_fixed_arrays: test_fixed_arrays.o $(LIBADEPT) $(CXXLINK) test_fixed_arrays.o $(MYLIBS) # Test program 12b test_fixed_arrays_active: test_fixed_arrays_active.o $(LIBADEPT) $(CXXLINK) test_fixed_arrays_active.o $(MYLIBS) # Test program 13 test_constructors: test_constructors.o $(LIBADEPT) $(CXXLINK) test_constructors.o $(MYLIBS) # Test program 14 test_derivatives: test_derivatives.o $(LIBADEPT) $(CXXLINK) test_derivatives.o $(MYLIBS) # Test program 15 test_array_derivatives: test_array_derivatives.o $(LIBADEPT) $(CXXLINK) test_array_derivatives.o $(MYLIBS) # Test program 16 test_thread_safe_arrays: test_thread_safe_arrays.o $(LIBADEPT) $(CXXLINK) test_thread_safe_arrays.o $(MYLIBS) # Test program 17 test_packet_operations: test_packet_operations.o $(LIBADEPT) $(CXXLINK) test_packet_operations.o $(MYLIBS) # Test program 18 test_fastexp: test_fastexp.o $(LIBADEPT) $(CXXLINK) test_fastexp.o $(MYLIBS) # Test program 19 test_reduce_active: test_reduce_active.o $(LIBADEPT) $(CXXLINK) test_reduce_active.o $(MYLIBS) # Test program 20 test_minimizer: test_minimizer.o $(LIBADEPT) $(CXXLINK) test_minimizer.o $(MYLIBS) # Test program 21 test_interp: test_interp.o $(LIBADEPT) $(CXXLINK) test_interp.o $(MYLIBS) # The no-automatic-differentiation version of the algorithm: uses the # -DADEPT_NO_AUTOMATIC_DIFFERENTIATION to produce a version of the # algorithm that takes double rather than adouble arguments algorithm_noad.o: algorithm.cpp *.h ../include/adept.h $(CXX) $(COMPILE_FLAGS) $(INCLUDES) -c algorithm.cpp -DADEPT_NO_AUTOMATIC_DIFFERENTIATION -o $@ # All other object files created by compiling the corresponding source # file without this flag %.o: %.cpp *.h ../include/*.h ../include/adept/*.h $(CXX) $(COMPILE_FLAGS) $(INCLUDES) -c $< test_arrays_active.o: test_arrays.cpp *.h ../include/*.h ../include/adept/*.h $(CXX) $(COMPILE_FLAGS) -DALL_ACTIVE $(INCLUDES) -c test_arrays.cpp -o test_arrays_active.o test_arrays_active_pausable.o: test_arrays.cpp *.h ../include/*.h ../include/adept/*h $(CXX) $(COMPILE_FLAGS) -DADEPT_RECORDING_PAUSABLE -DALL_ACTIVE $(INCLUDES) -c test_arrays.cpp -o test_arrays_active_pausable.o test_complex_arrays.o: test_arrays.cpp *.h ../include/*.h ../include/adept/*.h $(CXX) $(COMPILE_FLAGS) -DALL_COMPLEX $(INCLUDES) -c test_arrays.cpp -o test_complex_arrays.o test_fixed_arrays_active.o: test_fixed_arrays.cpp *.h ../include/*.h ../include/adept/*.h $(CXX) $(COMPILE_FLAGS) -DALL_ACTIVE $(INCLUDES) -c test_fixed_arrays.cpp -o test_fixed_arrays_active.o # Remove all object files and executables clean: rm -f $(OBJECTS) $(GSL_OBJECTS) $(PROGRAMS) test_stderr.txt test_results.txt mostlyclean: clean # Null targets to satisfy autotools EMPTY_AUTOMAKE_TARGETS = distdir install install-data install-exec uninstall \ install-dvi install-html install-info install-ps install-pdf \ installdirs installcheck distclean maintainer-clean \ dvi pdf ps info html tags ctags .PHONY: $(EMPTY_AUTOMAKE_TARGETS) $(EMPTY_AUTOMAKE_TARGETS): ================================================ FILE: test/README ================================================ This directory contains examples to demonstrate various features of Adept. Type "make check" from the directory above to compile them. Note that unlike in the rest of this package, the Makefile in this directory was not generated by automake; it is well commented and so may assist in understanding how to build software that uses Adept. TEST 1: BASIC FEATURES Executable: test_adept Source files: test_adept.cpp, algorithm.cpp, algorithm.h Demonstrates: basic use of Adept, reverse-mode automatic differentiation, computing the Jacobian matrix, printing diagnostic information, verifying results by comparing to numerical calculations, pausing and continuing recordings Synopsis: This program demonstrates how to differentiate a simple function (in algorithm.cpp), comparing the results from automatic differentiation with numerical differentiation. The function used is the contrived example from the Adept paper. TEST 2: COMPILING SOURCE FILES TWICE, WITH AND WITHOUT AUTOMATIC DIFFERENTIATION Executable: test_adept_with_and_without_ad Source files: test_adept_with_and_without_ad.cpp, algorithm.cpp, algorithm.h, algorithm_with_and_without_ad.h Demonstrates: most of the same features as TEST_ADEPT, plus compiling a source file twice Synopsis: This program is the same as in Test 1, except that algorithm.cpp is compiled twice, once with automatic differentiation (producing the object file algorithm.o) and once without (producing the object file algorithm_noad.o). This is achieved in the Makefile using the -DADEPT_NO_AUTOMATIC_DIFFERENTIATION flag. This provides two overloaded versions of the "algorithm" function, one that takes active "adouble" arguments, and the other that takes inactive "double" arguments. The two versions are declared in the algorithm_with_and_without_ad.h header file. TEST 3: RADIANCE SIMULATION Executable: test_radiances Source files: test_radiances.cpp, simulate_radiances.cpp, simulate_radiances.h Demonstrates: activation and deactivation of an Adept stack, using more than one Adept stack in the same program (but not at the same time), how to interface Adept with software that computes its own Jacobian Synopsis: The "main" function is in test_radiances.cpp, and demonstrates how to interface Adept to an algorithm that does not have an Adept interface, but which provides its own Jacobian. The algorithm in this case is in simulate_radiances.cpp; while it does not have an Adept interface, it does use Adept internally to compute the Jacobian that it returns. It therefore needs to temporarily deactivate the calling function's Adept stack (where derivative information is stored) while using its own. This example is from the Adept documentation. TEST 4: GSL MINIMIZATION INTERFACE Executable: test_gsl_interface Command-line arguments: optionally, the executable name can be followed by an integer (which should be 2 or greater) expressing the number of dimensions of the minimization problem. The default is 2. Source files: test_gsl_interface.cpp, rosenbrock_banana_function.cpp, state.cpp, state.h Pre-requisites: the GNU Scientific Library should be installed; on an RPM-based system you want the "gsl" and "gsl-devel" packages. If this is not available at the time the configure script is run, this executable will not be built. Demonstrates: interface with the multi-dimensional minimization capability of the GNU Scientific Library, use of Adept to minimize a real function, an object-oriented way to store Adept data for a minimization problem Synopsis: The "main" function is in test_gsl_interface.cpp and is fairly self-explanatory. The state.cpp and state.h files show how Adept data can be stored and accessed in an object-oriented way. The function to be minimized is the N-dimensional Rosenbrock banana function, given in rosenbrock_banana_function.cpp. TEST 5: TRIVIAL EXAMPLE IN ADEPT PAPER Executable: test_misc Source files: test_misc.cpp, algorithm.cpp, algorithm.h Demonstrates: basic use of Adept, reverse-mode automatic differentiation Synopsis: This program is simply the trivial example in the Adept paper, using the same algorithm as in Test 1. TEST 6: CHECKPOINTING Executable: test_checkpointing Source files: test_checkpoint.cpp Demonstrates: checkpointing Synopsis: Large algorithms, particularly those that involve time-dependent simulations, can require a lot of memory when used with an automatic-differentiation tool. Even if enough memory is available, the speed may be sub-optimal. This program demonstrates the checkpointing technique, where a simulation using the "Toon" algorithm in the Adept paper is first run with 10,000 timesteps, and then in 100 blocks of 100 timesteps (the checkpointed simulation), with the output stored after each block so that the reverse pass of the automatic differentiation needs 100 times less memory. The resulting gradients are output to verify that the two versions produce the results, and the timings of the two are presented as well. TEST 7: THREAD SAFETY Executable: test_thread_safe Source files: test_thread_safe.cpp Demonstrates: use of Adept in multi-threaded applications, thread safety, comparison of Jacobian matrices computed using the forward and reverse methods Synopsis: This program computes the 128-128 Jacobian matrix of an algorithm 16 times with different inputs. The Jacobian matrix is actually computed twice, once with 128 forward passes through the derivative statements and once with 128 reverse passes through the derivative statements, and a check is performed to see that the root-mean-squared difference is within some tolerance. The default behaviour (and if the "-parallel" command-line argument is provided) is to use OpenMP to run the 16 computations in parallel. In this instance the 128 passes required to compute the Jacobian matrices will be computed using just a single thread. If the "-serial" command-line argument is provided then the 16 computations are carried out in series. In this instance, the Adept library is able to run the Jacobian-matrix calculation in parallel (this behaviour is automatic if the program is compiled with the -fopenmp option). If the program is compiled with the ADEPT_STACK_THREAD_UNSAFE preprocessor variable defined, or on platforms that don't support thread-local variables (e.g. some Mac platforms), then the program should abort in the "-parallel" case ONLY. TEST 8: COMPILING WITHOUT EXTERNAL ADEPT LIBRARY Executable: test_no_lib Source files: test_no_lib.cpp algorithm.cpp algorithm.h Demonstrates: use of adept_source.h to create an executable without the need to the external Adept library Synopsis: This is basically the same as test_misc.cpp, but one of the source files includes adept_source.h (rather than adept.h), which contains the source code for the Adept library. This means that no linking to an external Adept library (via -ladept) is required. This capability makes it easier to distribute a package that can be used on the widest range of operating systems, particularly those like Microsoft Windows that cannot natively run the configure shell script. TEST 9a,b,c,d: ARRAY FUNCTIONALITY Executables: (a) test_arrays, (b) test_arrays_active, (c) test_arrays_active_pausable, (d) test_complex_arrays Source files: test_arrays.cpp Demonstrates: array functionality for (a) passive arrays, (b) active arrays, (c) active arrays but with stack recording "paused", (d) complex arrays. TEST 10: ARRAY SPEED Executable: test_array_speed Source files: test_array_speed.cpp Demonstrates: speed of arrays versus for loops TEST 11: RADIANCE SIMULATION WITH ARRAYS Executable: test_radiances_array Source files: test_radiances_array.cpp, simulate_radiances.cpp, simulate_radiances.h Demonstrates: use of arrays with add/append_derivative_dependence TEST 12a,b: FIXED-ARRAY FUNCTIONALITY Executables: (a) test_fixed_arrays, (b) test_fixed_arrays_active Source file: test_fixed_arrays.cpp Demonstrates: functionality of fixed arrays, i.e. those whose dimensions are set at compile time: (a) passive version, and (b) active version. TEST 13: ARRAY CONSTRUCTORS Executable: test_constructors Source file: test_constructors.cpp Demonstrates: different ways of constructing, assigning and linking arrays, and passing them to and from functions. TEST 14: DERIVATIVES Exeutable: test_derivatives Source file: test_derivatives.cpp Demonstrates: validity of the automatic differentiation of all mathematical functions supported by Adept, via finite differencing. TEST 15: ARRAY DERIVATIVES Exeutable: test_array_derivatives Source file: test_array_derivatives.cpp Demonstrates: validity of the automatic differentiation of selected array operations, on both Array types and FixedArray types. TEST 16: THREAD-SAFE ARRAYS Executable: test_thread_safe_arrays Source file: test_thread_safe_arrays.cpp Demonstrates: two ways to make accessing arrays thread safe: use the soft_link() member function of Array and SpecialMatrix, OR compile with ADEPT_STORAGE_THREAD_SAFE (C++11 only). TEST 17: PACKET OPERATIONS Executable: test_packet_operations Source file: test_packet_operations.cpp Demonstrates: Use of Intel or ARM intrinsics is mathematically consistent regardless of whether code is compiled with SSE2, NEON, AVX2 or AVX512. You will need to recompile with (e.g. for g++) -msse2, -mavx2 or -mavx512f (or simply march=native to use the best instruction set available) and check that the output is the same each time. TEST 18: FAST EXPONENTIAL OPERATIONS Executable: test_fastexp Source file: test_fastexp.cpp Demonstrates: Correctness of Adept's fast exponential function. TEST 19: ACTIVE REDUCE OPERATIONS Executable: test_reduce_active Source file: test_reduce_active.cpp Demonstrates: differentiation of reduction operations (sum, product, maxval etc). TEST 20: MINIMIZER Executable: test_minimizer Source file: test_minimizer.cpp Demonstrates: Adept's various minimization algorithms on the N-dimensional Rosenbrock banana function, where the (optional) arguments are: 1. number of dimensions, default 2 2. minimization algorithm string, default "Levenberg-Marquardt" (also available: Levenberg, L-BFGS, Conjugate-Gradient, Conjugate-Gradient-FR; additionally, the "Newton-Levenberg-Marquardt" and "Newton-Levenberg" will use the exact Hessian, rather than an approximation 3. maximum number of iterations, default 100 4. gradient-norm to indicate convergence, default 0.1 The cost function value and gradient norm are reported to standard output each iteration. To standard error is written a table of numbers, one line per call to the function being minimized. The first on each line is the number of the sub-iteration, usually the number of the call to the line-search algorithm, starting at 0. Then follows the N values of the state vector, followed by the value of the cost function. This can be used to plot how each minimizer progresses to the solution. TEST 21: INTERPOLATION Executable: test_interp Source file: test_interp.cpp Demonstrates: Adept's interpolation functions interp, interp2d and interp3d. ================================================ FILE: test/algorithm.cpp ================================================ /* algorithm.cpp - A simple demonstration algorithm used in Tests 1 & 2 Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #include "algorithm.h" using adept::adouble; // A simple demonstration algorithm used in the Adept paper. Note that // this algorithm can be compiled with // -DADEPT_NO_AUTOMATIC_DIFFERENTIATION to create a version that takes // double arguments and returns a double result. adouble algorithm(const adouble x[2]) { adouble y = 4.0; adouble s = 2.0*x[0] + 3.0*x[1]*x[1]; double b=3.0; y = s + b; y *= sin(s); return y; } ================================================ FILE: test/algorithm.h ================================================ /* algorithm.h - Header file for the simple example algorithm function Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #ifndef ALGORITHM_H #define ALGORITHM_H 1 // This header file defining the interface of the simple demonstration // function "algorithm". This header file is included by both // algorithm.cpp, which defines the body of the function, and // test_adept.cpp, which calls algorithm. #include "adept.h" // Declare the function adept::adouble algorithm(const adept::adouble x[2]); #endif ================================================ FILE: test/algorithm_with_and_without_ad.h ================================================ /* algorithm_with_and_without_ad.h Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // This header file defining the interface of the simple demonstration // function "algorithm", and is included by // test_adept_with_and_without_ad.cpp. It demonstrates the use of a // single source file that is compiled twice to produce two overloaded // versions of a function. The "original" version takes // double-precision arguments and returns a double-precision answer, // while the automatic differentiation version takes adouble arguments // and returns an adouble answer. The two versions are compiled from // the same source file algorithm.cpp by compiling it twice with and // without the compiler option -DAUTOMATIC_DIFFERENTIATION. #ifndef ALGORITHM_WITH_AND_WITHOUT_AD_H #define ALGORITHM_WITH_AND_WITHOUT_AD_H 1 #include "adept.h" // Declare the original version of the function double algorithm(const double x[2]); #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION // Declare the automatic-differentiation version of the function adept::adouble algorithm(const adept::adouble x[2]); #endif #endif ================================================ FILE: test/rosenbrock_banana_function.cpp ================================================ /* rosenbrock_banana_function.cpp - N-dimensional Rosenbrock function Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // This function is an N-dimensional extension of Rosenbrock's banana // function; it is actually the "2nd De Jong function" - see the // Wikipedia entry for Rosenbrock's function. #include "state.h" using adept::adouble; adouble State::calc_function_value(const adouble* x) { adouble sum = 0.0; for (unsigned int i = 0; i < nx()-1; i++) { adouble a = x[i+1]-x[i]*x[i]; sum += (1.0-x[i])*(1.0-x[i]) + 100.0*a*a; } return sum; } ================================================ FILE: test/run_tests.sh ================================================ #!/bin/sh # Simple script to run all programs provided to it and report whether # they succeed or fail LOG=test_results.txt STDERR=test_stderr.txt rm -f $LOG touch $LOG echo echo "Writing output of test programs to $LOG" echo FAILURES=0 for TEST in "$@" do if [ -x "$TEST" ] then rm -f $STDERR echo >> $LOG echo "########################################################" >> $LOG echo "### $TEST" >> $LOG echo "########################################################" >> $LOG echo >> $LOG # The built-in version of "echo" on some versions of "sh" does # not treat the "-n" option correctly, so we use /bin/echo # here /bin/echo -n "$TEST... " ./$TEST >> $LOG 2> $STDERR if [ "$?" = 0 ] then echo "PASSED" else echo "*** FAILED ***" cat $STDERR FAILURES=`expr $FAILURES + 1` fi else echo "$TEST does not exist" fi done echo if [ "$FAILURES" -gt "0" ] then echo "$FAILURES programs failed in some way - see detailed output in $LOG" else echo "All test programs ran successfully" fi echo exit $FAILURES ================================================ FILE: test/simulate_radiances.cpp ================================================ /* simulate_radiances.cpp - provides a function taking inactive arguments that returns also Jacobian matrices Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include "adept.h" #include "simulate_radiances.h" using adept::aReal; using adept::Real; // Simulate a single radiance (W sr-1 m-3) given the wavelength (m), // emissivity profile, surface temperature (K) and temperature profile // (K), where the profile data are located at n points with spacing // 1000 m. This function uses active arguments. It is accessible only // from within this file; the public interface is the // simulate_radiance function. static aReal simulate_radiance_private(int n, Real wavelength, const Real* emissivity, const aReal& surface_temperature, const aReal* temperature) { static const Real BOLTZMANN_CONSTANT = 1.380648813e-23; static const Real SPEED_OF_LIGHT = 299792458.0; int i; aReal bt = surface_temperature; // Brightness temperature in K // Loop up through the atmosphere working out the contribution from // each layer for (i = 0; i < n; i++) { bt = bt*(1.0-emissivity[i]) + emissivity[i]*temperature[i]; } // Convert from brightness temperature to radiance using // Rayleigh-Jeans approximation return 2.0*SPEED_OF_LIGHT*BOLTZMANN_CONSTANT*bt /(wavelength*wavelength*wavelength*wavelength); } // Simulate two radiances (W sr-1 m-3) given the surface temperature // (K) and temperature profile (K), where the profile data are located // at n points with spacing 1000 m. This function uses inactive // arguments. void simulate_radiances(int n, // Size of temperature array // Input variables: Real surface_temperature, const Real* temperature, // Output variables: Real radiance[2], // Output Jacobians: Real dradiance_dsurface_temperature[2], Real* dradiance_dtemperature) { // First temporarily deactivate any existing Adept stack used by the // calling function adept::Stack* caller_stack = adept::active_stack(); if (caller_stack != 0) { caller_stack->deactivate(); } // Within the scope of these curly brackets, another Adept stack // will be used { // Ficticious oxygen channels around 60 GHz: wavelength in m static const Real wavelength[2] = {0.006, 0.0061}; // Mass absorption coefficient of oxygen in m2 kg-1 static const Real mass_abs_coefft[2] = {3.0e-5, 3.0e-3}; // Layer thickness in m static const Real dz = 1000.0; // Density of oxygen in kg m-3 std::vector density_oxygen(n); // Emissivity at a particular microwave wavelength std::vector emissivity(n); // Start a new stack adept::Stack s; // Create local active variables: surface temperature, temperature // and radiance aReal st = surface_temperature; std::vector t(n); aReal r[2]; // Initialize the oxygen density and temperature for (int i = 0; i < n; i++) { Real altitude = i*dz; // Oxygen density uses an assumed volume mixing ratio with air // of 21%, molecular mass of 16 (compared to 29 for air), a // surface air density of 1.2 kg m-3 and an atmospheric scale // height of 8000 m density_oxygen[i] = 1.2*0.21*(16.0/29.0)*exp(-altitude/8000.0); t[i] = temperature[i]; } // Start recording derivative information s.new_recording(); // Loop through the two channels for (int ichan = 0; ichan < 2; ichan++) { // Compute the emissivity profile for (int i = 0; i < n; i++) { emissivity[i] = 1.0-exp(-density_oxygen[i]*mass_abs_coefft[ichan]*dz); } // Simulate the radiance r[ichan] = simulate_radiance_private(n, wavelength[ichan], &emissivity[0], st, &t[0]); // Copy the aReal variable to the Real variable radiance[ichan] = r[ichan].value(); } // Declare independent (x) and dependent (y) variables for // Jacobian matrix s.independent(st); s.independent(&t[0], n); s.dependent(r, 2); // Compute Jacobian matrix std::vector jacobian((n+1)*2); s.jacobian(&jacobian[0]); // Copy elements of Jacobian matrix into the calling arrays for (int ichan = 0; ichan < 2; ichan++) { dradiance_dsurface_temperature[ichan] = jacobian[ichan]; for (int i = 0; i < n; i++) { dradiance_dtemperature[i*2+ichan] = jacobian[2+i*2+ichan]; } } // At the following curly bracket, the local Adept stack will be // destructed } // Reactivate the Adept stack of the calling function if (caller_stack != 0) { caller_stack->activate(); } } ================================================ FILE: test/simulate_radiances.h ================================================ /* simulate_radiances.h - a function taking inactive arguments that returns also Jacobian matrices Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include void simulate_radiances(int n, // Size of temperature array // Input variables: adept::Real surface_temperature, const adept::Real* temperature, // Output variables: adept::Real radiance[2], // Output Jacobians: adept::Real dradiance_dsurface_temperature[2], adept::Real* dradiance_dtemperature); ================================================ FILE: test/state.cpp ================================================ /* state.cpp - An object-oriented interface to an Adept-based minimizer Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // Note that this implementation uses the GNU Scientific Library (GSL) // to provide the quasi-Newton minimization capability #include #include #include "state.h" // C functions needed by GSL // Return function value given a vector of state variables x extern "C" double my_function_value(const gsl_vector* x, void* params) { State* state = reinterpret_cast(params); return state->calc_function_value(x->data); } // Return gradient of function with respect to each state variable x extern "C" void my_function_gradient(const gsl_vector* x, void* params, gsl_vector* gradJ) { State* state = reinterpret_cast(params); state->calc_function_value_and_gradient(x->data, gradJ->data); } // Return both function and its gradient extern "C" void my_function_value_and_gradient(const gsl_vector* x, void* params, double* J, gsl_vector* gradJ) { State* state = reinterpret_cast(params); *J = state->calc_function_value_and_gradient(x->data, gradJ->data); } using adept::adouble; // "State" member function for returning the value of the function; it // does this by calling the underlying calc_function_value(const // adouble&) function, which is defined in // rosenbrock_banana_function.cpp. Since the gradient is not // required, the recording of automatic differentiation is "paused" // while this function is called. double State::calc_function_value(const double* x) { stack_.pause_recording(); for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i]; double result = value(calc_function_value(&active_x_[0])); stack_.continue_recording(); return result; } // Member function for returning both the value of the function and // its gradient - here Adept is used to compute the gradient double State::calc_function_value_and_gradient(const double* x, double* dJ_dx) { for (unsigned int i = 0; i < nx(); ++i) active_x_[i] = x[i]; stack_.new_recording(); adouble J = calc_function_value(&active_x_[0]); J.set_gradient(1.0); stack_.compute_adjoint(); adept::get_gradients(&active_x_[0], nx(), dJ_dx); return value(J); } // Minimize the function, returning true if minimization successful, // false otherwise bool State::minimize() { // Minimizer settings const double initial_step_size = 0.01; const double line_search_tolerance = 1.0e-4; const double converged_gradient_norm = 1.0e-3; // Use the "limited-memory BFGS" quasi-Newton minimizer const gsl_multimin_fdfminimizer_type* minimizer_type = gsl_multimin_fdfminimizer_vector_bfgs2; // Declare and populate structure containing function pointers gsl_multimin_function_fdf my_function; my_function.n = nx(); my_function.f = my_function_value; my_function.df = my_function_gradient; my_function.fdf = my_function_value_and_gradient; my_function.params = reinterpret_cast(this); // Set initial state variables using GSL's vector type: use -5.0 for // every value gsl_vector *x; x = gsl_vector_alloc(nx()); for (unsigned int i = 0; i < nx(); ++i) gsl_vector_set(x, i, -5.0); // Configure the minimizer, and call function once gsl_multimin_fdfminimizer* minimizer = gsl_multimin_fdfminimizer_alloc(minimizer_type, nx()); gsl_multimin_fdfminimizer_set(minimizer, &my_function, x, initial_step_size, line_search_tolerance); // Print out the result of the first function call with the initial // state std::cout << "Initial state: x = ["; for (unsigned int i = 0; i < nx(); i++) { std::cout << active_x_[i].value() << " "; } std::cout << "], cost_function = " << minimizer->f << "\n"; // Begin loop size_t iter = 0; int status; do { ++iter; // Perform one iteration status = gsl_multimin_fdfminimizer_iterate(minimizer); // Quit loop if iteration failed if (status != GSL_SUCCESS) break; // Test for convergence status = gsl_multimin_test_gradient(minimizer->gradient, converged_gradient_norm); // Print out limited number of state variables from this // iteration, and the corresponding cost function std::cout << "Iteration " << iter << ": x = ["; for (unsigned int i = 0; i < nx(); i++) { std::cout << active_x_[i].value() << " "; if (i >= 5) { std::cout << "..."; break; } } std::cout << "], cost_function = " << minimizer->f << "\n"; } while (status == GSL_CONTINUE && iter < 1000); // Free memory gsl_multimin_fdfminimizer_free(minimizer); gsl_vector_free(x); // Return true if successfully minimized function, false otherwise if (status == GSL_SUCCESS) { std::cout << "Minimum found after " << iter << " iterations\n"; return true; } else { std::cout << "Minimizer failed after " << iter << " iterations: " << gsl_strerror(status) << "\n"; return false; } } // Enquiry function to return the current value of the state // variables, called after minimize() has been run. void State::x(std::vector& x_out) const { x_out.resize(nx()); for (unsigned int i = 0; i < nx(); i++) { x_out[i] = active_x_[i].value(); } } ================================================ FILE: test/state.h ================================================ /* state.h - An object-oriented interface to an Adept-based minimizer Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #ifndef STATE_H #define STATE_H 1 #include #include "adept.h" class State { public: // Construct a state with n state variables State(int n) { active_x_.resize(n); } // Minimize the function, returning true if minimization // successful, false otherwise bool minimize(); // Get copy of state variables after minimization void x(std::vector& x_out) const; // For input state variables x, compute the function J(x) and // return it double calc_function_value(const double* x); // For input state variables x, compute function and put its // gradient in dJ_dx double calc_function_value_and_gradient(const double* x, double* dJ_dx); // Return the size of the state vector unsigned int nx() const { return active_x_.size(); } protected: // Active version of the function: the algorithm is contained in // the definition of this function (in // rosenbrock_banana_function.cpp) adept::adouble calc_function_value(const adept::adouble* x); // DATA adept::Stack stack_; // Adept stack object std::vector active_x_; // Active state variables }; #endif ================================================ FILE: test/test_adept.cpp ================================================ /* test_adept.cpp - Demonstration of basic features of Adept Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #include "adept.h" // Provide function prototype for "algorithm"; see algorithm.cpp for // the contents of the function #include "algorithm.h" int main(int argc, char** argv) { using adept::adouble; using adept::Real; // Start an Adept stack before the first adouble object is // constructed adept::Stack s; adouble x[2]; // Our independent variables adouble y; // Our dependent variable // Set the values of x x[0] = 2.0; x[1] = 3.0; // PART 1: NUMERICAL ADJOINT std::cout << "*** Computing numerical adjoint ***\n\n"; // We will provide an estimate of the adjoints by perturbing the // inputs by a small amount adouble x_perturbed[2]; // Perturbed independent variables // This version of the code uses the same algorithm function that // takes adouble arguments for doing the numerical adjoint, even // though we are not doing automatic differentiation. To make it // faster, we can turn off the recording of derivative information // using the pause_recording function. This only works if all code // has been compiled with the -DADEPT_RECORDING_PAUSABLE flag; // otherwise it does nothing (so the program will still run // correctly, but will be less efficient). Note that another // approach if you want to call a function several times, sometimes // with automatic differentiation and sometimes without, is // demonstrated in // test_adept_with_without_automatic_differentiation.cpp. s.pause_recording(); // We will compare the Adept result to a numerically computed // adjoint, so define the perturbation size double dx = 1.0e-5; // Run the algorithm y = algorithm(x); // Now perturb x[0] and x[1] in turn and get a numerical estimate of // the gradient x_perturbed[0] = x[0]+dx; x_perturbed[1] = x[1]; double dy_dx0 = adept::value((algorithm(x_perturbed)-y)/dx); x_perturbed[0] = x[0]; x_perturbed[1] = x[1]+dx; double dy_dx1 = adept::value((algorithm(x_perturbed)-y)/dx); // Turn the recording of deriviative information back on s.continue_recording(); // Print information about the data held in the stack std::cout << "Stack status after numerical adjoint (if recording was successfully\n" << "paused then the number of operations should be zero):\n" << s; // Print memory information std::cout << "Memory usage: " << s.memory() << " bytes\n\n"; // PART 2: REVERSE-MODE AUTOMATIC DIFFERENTIATION // Now we use Adept to do the automatic differentiation std::cout << "*** Computing adjoint using automatic differentiation ***\n\n"; // Start a new recording of derivative statements; note that this // must be done after the independent variables x[0] and x[1] are // defined and after they have been given their initial values s.new_recording(); // Run the algorithm again y = algorithm(x); // Print information about the data held in the stack std::cout << "Stack status after algorithm run but adjoint not yet computed:\n" << s; // Print memory information std::cout << "Memory usage: " << s.memory() << " bytes\n\n"; // If we set the adjoint of the dependent variable to 1 then the // resulting adjoints of the independent variables after // reverse-mode automatic differentiation will be comparable to the // outputs of the numerical differentiation y.set_gradient(1.0); // Print out some diagnostic information std::cout << "List of derivative statements:\n"; s.print_statements(); std::cout << "\n"; std::cout << "Initial list of gradients:\n"; s.print_gradients(); std::cout << "\n"; // Run the adjoint algorithm (reverse-mode differentiation) s.reverse(); // Some more diagnostic information std::cout << "Final list of gradients:\n"; s.print_gradients(); std::cout << "\n"; // Extract the adjoints of the independent variables double x0_ad = 0, x1_ad = 0; x[0].get_gradient(x0_ad); x[1].get_gradient(x1_ad); // PART 3: JACOBIAN COMPUTATION // Here we use the same recording to compute the Jacobian matrix std::cout << "*** Computing Jacobian matrix ***\n\n"; s.independent(x, 2); // Declare independents s.dependent(y); // Declare dependents Real jac[2]; // Where the Jacobian will be stored s.jacobian(jac); // Compute Jacobian // PART 4: PRINT OUT RESULTS // Print information about the data held in the stack std::cout << "Stack status after adjoint and Jacobian computed:\n" << s; // Print memory information std::cout << "Memory usage: " << s.memory() << " bytes\n\n"; std::cout << "Result of forward algorithm:\n"; std::cout << " y = " << y.value() << "\n"; std::cout << "Comparison of gradients:\n"; std::cout << " dy_dx0[numerical] = " << dy_dx0 << "\n"; std::cout << " dy_dx0[adjoint] = " << x0_ad << "\n"; std::cout << " dy_dx0[jacobian] = " << jac[0] << "\n"; std::cout << " dy_dx1[numerical] = " << dy_dx1 << "\n"; std::cout << " dy_dx1[adjoint] = " << x1_ad << "\n"; std::cout << " dy_dx1[jacobian] = " << jac[1] << "\n"; std::cout << "\nNote that the numerical gradients are less accurate since they use\n" << "a finite difference and are also succeptible to round-off error.\n"; return 0; } ================================================ FILE: test/test_adept_with_and_without_ad.cpp ================================================ /* test_adept_with_and_without_ad.cpp Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // Demonstration of the use of Adept with code (in this case, // algorithm.cpp) that has been compiled twice, once with automatic // differentiation enabled (the default) and once with it disabled // (using -DADEPT_NO_AUTOMATIC_DIFFERENTIATION) to provide a faster // version of a function that works with double rather than adouble // objects. #include #include "adept.h" // Provide function prototypes for "algorithm"; see algorithm.cpp for // the contents of the function #include "algorithm_with_and_without_ad.h" // Simple demonstration of automatic differentiation using Adept int main(int argc, char** argv) { using adept::adouble; using adept::Real; // Start an Adept stack before the first adouble object is // constructed adept::Stack s; adouble x[2]; // Our independent variables adouble y; // Our dependent variable // Set the values of x x[0] = 2.0; x[1] = 3.0; // PART 1: NUMERICAL ADJOINT std::cout << "*** Computing numerical adjoint ***\n\n"; // We will compare the Adept result to a numerically computed // adjoint, so define the perturbation size double dx = 1.0e-5; // Initialize a inactive version of x as double rather than adouble // variables double x_r[2]; x_r[0] = x[0].value(); x_r[1] = x[1].value(); // Run the original version of the algorithm that takes real // arguments; this was compiled from algorithm.cpp using the // -DADEPT_NO_AUTOMATIC_DIFFERENTIATION flag to produce the // algorithm_noad.o object file double y_real = algorithm(x_r); // Now perturb x[0] and x[1] in turn and get a numerical estimate of // the gradient x_r[0] = x[0].value()+dx; x_r[1] = x[1].value(); double dy_dx0 = (algorithm(x_r)-y_real)/dx; x_r[0] = x[0].value(); x_r[1] = x[1].value()+dx; double dy_dx1 = (algorithm(x_r)-y_real)/dx; // Print information about the data held in the stack std::cout << "Stack status after numerical adjoint (number of operations should be zero):\n" << s << "\n"; // PART 2: REVERSE-MODE AUTOMATIC DIFFERENTIATION std::cout << "*** Computing adjoint using automatic differentiation ***\n\n"; // Start a new recording of derivative statements (note that this // must be done after the independent variables x[0] and x[1] are // initialized s.new_recording(); // Now use Adept to do it - first run the algorithm overloaded for // adouble arguments y = algorithm(x); // Print information about the data held in the stack std::cout << "Stack status after algorithm run but adjoint not yet computed:\n" << s << "\n"; // If we set the adjoint of the dependent variable to 1 then the // resulting adjoints of the independent variables after // reverse-mode automatic differentiation will be comparable to the // outputs of the numerical differentiation y.set_gradient(1.0); // Print out some diagnostic information std::cout << "List of derivative statements:\n"; s.print_statements(); std::cout << "\n"; std::cout << "Initial list of gradients:\n"; s.print_gradients(); std::cout << "\n"; // Run the adjoint algorithm (reverse-mode differentiation) s.reverse(); std::cout << "Final list of gradients:\n"; s.print_gradients(); std::cout << "\n"; // Extract the adjoints of the independent variables double x0_ad = 0, x1_ad = 0; x[0].get_gradient(x0_ad); x[1].get_gradient(x1_ad); // PART 3: JACOBIAN COMPUTATION // Here we use the same recording to compute the Jacobian matrix std::cout << "*** Computing Jacobian matrix ***\n\n"; s.independent(x, 2); // Declare independents s.dependent(y); // Declare dependents Real jac[2]; // Jacobian data must be of type "Real" s.jacobian(jac); // Compute Jacobian // PART 4: PRINT OUT RESULT // Print information about the data held in the stack std::cout << "Stack status after adjoint and Jacobian computed:\n" << s << "\n"; // Print memory information std::cout << "Memory usage: " << s.memory() << " bytes\n\n"; std::cout << "Result of forward algorithm:\n"; std::cout << " y[from algorithm taking double arguments] = " << y_real << "\n"; std::cout << " y[from algorithm taking adouble arguments] = " << y.value() << "\n\n"; std::cout << "Comparison of gradients:\n"; std::cout << " dy_dx0[numerical] = " << dy_dx0 << "\n"; std::cout << " dy_dx0[adjoint] = " << x0_ad << "\n"; std::cout << " dy_dx0[jacobian] = " << jac[0] << "\n"; std::cout << " dy_dx1[numerical] = " << dy_dx1 << "\n"; std::cout << " dy_dx1[adjoint] = " << x1_ad << "\n"; std::cout << " dy_dx1[jacobian] = " << jac[1] << "\n"; std::cout << "\nNote that the numerical gradients are less accurate since they use\n" << "a finite difference and are also succeptible to round-off error.\n"; return 0; } ================================================ FILE: test/test_array_derivatives.cpp ================================================ /* test_array_derivatives.cpp - Test derivatives of array expressions Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include // Arbitrary algorithm converting array of general type A to scalar of // type S, which may be active or passive template void algorithm(const A& x, S& y) { using namespace adept; A tmp; intVector index(2); index << 1, 0; tmp = atan2((exp(x) * x), spread<0>(x(index,1),2)) / x(0,0); y = sum(tmp); } int main(int argc, const char** argv) { using namespace adept; Stack stack; // Matrix dimension static const int N = 2; static const Real MAX_FRAC_ERR = 1.0e-5; // Perturbation size for numerical calculation Real dx = 1.0e-6; if (sizeof(Real) < 8) { // Single precision only works with larger perturbations dx = 1.0e-4; } // Maximum fractional error Real max_frac_err; bool error_too_large = false; // Input data Matrix X(N,N); X << 2, 3, 5, 7; // Numerical calculation std::cout << "NUMERICAL CALCULATION\n"; Matrix dJ_dx_num(N,N); { Real J; algorithm(X, J); std::cout << "J = " << J << "\n"; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { Matrix Xpert(N,N); Xpert = X; Xpert(i,j) += dx; Real Jpert; algorithm(Xpert, Jpert); dJ_dx_num(i,j) = (Jpert - J) / dx; } } } std::cout << "dJ_dx_num = " << dJ_dx_num << "\n"; std::cout << "\nNUMERICAL CALCULATION WITH \"FixedArray\"\n"; Matrix22 dJ_dx_num_FixedArray; { Real J; algorithm(X, J); std::cout << "J = " << J << "\n"; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { Matrix22 Xpert = X; Xpert(i,j) += dx; Real Jpert; algorithm(Xpert, Jpert); dJ_dx_num_FixedArray(i,j) = (Jpert - J) / dx; } } } std::cout << "dJ_dx_num_FixedArray = " << dJ_dx_num_FixedArray << "\n"; // Adept calculation with aArray std::cout << "\nADEPT CALCULATION WITH \"aArray\"\n"; Matrix dJ_dx_adept_Array(N,N); { aMatrix aX = X; stack.new_recording(); aReal aJ; algorithm(aX, aJ); std::cout << "J = " << aJ << "\n"; aJ.set_gradient(1.0); stack.reverse(); dJ_dx_adept_Array = aX.get_gradient(); } std::cout << "dJ_dx_adept_Array = " << dJ_dx_adept_Array << "\n"; max_frac_err = maxval(abs(dJ_dx_adept_Array-dJ_dx_num)/dJ_dx_num); if (max_frac_err <= MAX_FRAC_ERR) { std::cout << "max fractional error = " << max_frac_err << ": PASSED\n"; } else { std::cout << "max fractional error = " << max_frac_err << ": FAILED\n"; error_too_large = true; } // Adept calculation with aFixedArray std::cout << "\nADEPT CALCULATION WITH \"aFixedArray\"\n"; Matrix dJ_dx_adept_FixedArray; { aMatrix22 aX = X; stack.new_recording(); aReal aJ; algorithm(aX, aJ); std::cout << "J = " << aJ << "\n"; aJ.set_gradient(1.0); stack.reverse(); dJ_dx_adept_FixedArray = aX.get_gradient(); } std::cout << "dJ_dx_adept_FixedArray = " << dJ_dx_adept_FixedArray << "\n"; max_frac_err = maxval(abs(dJ_dx_adept_FixedArray-dJ_dx_num)/dJ_dx_num); if (max_frac_err <= MAX_FRAC_ERR) { std::cout << "max fractional error = " << max_frac_err << ": PASSED\n"; } else { std::cout << "max fractional error = " << max_frac_err << ": FAILED\n"; error_too_large = true; } // Adept forward calculation with aArray: four (NxN) separate // calculations are required to compute the derivative with respect // to the four inputs. std::cout << "\nADEPT FORWARD CALCULATION WITH \"aArray\"\n"; Matrix dJ_dx_adept_forward_Array(N,N); { aMatrix aX = X; stack.new_recording(); aReal aJ; algorithm(aX, aJ); std::cout << "J = " << aJ << "\n"; Matrix X_tl(N,N); X_tl=0.0; X_tl(0,0) = 1.0; aX.set_gradient(X_tl); stack.forward(); dJ_dx_adept_forward_Array(0,0) = aJ.get_gradient(); stack.clear_gradients(); X_tl=0.0; X_tl(0,1) = 1.0; aX.set_gradient(X_tl); stack.forward(); dJ_dx_adept_forward_Array(0,1) = aJ.get_gradient(); stack.clear_gradients(); X_tl=0.0; X_tl(1,0) = 1.0; aX.set_gradient(X_tl); stack.forward(); dJ_dx_adept_forward_Array(1,0) = aJ.get_gradient(); stack.clear_gradients(); X_tl=0.0; X_tl(1,1) = 1.0; aX.set_gradient(X_tl); stack.forward(); dJ_dx_adept_forward_Array(1,1) = aJ.get_gradient(); } std::cout << "dJ_dx_adept_forward_Array = " << dJ_dx_adept_forward_Array << "\n"; max_frac_err = maxval(abs(dJ_dx_adept_forward_Array-dJ_dx_num)/dJ_dx_num); if (max_frac_err <= MAX_FRAC_ERR) { std::cout << "max fractional error = " << max_frac_err << ": PASSED\n"; } else { std::cout << "max fractional error = " << max_frac_err << ": FAILED\n"; error_too_large = true; } std::cout << "\n"; if (error_too_large) { std::cerr << "*** Error: fractional error in the derivatives of some configurations too large\n"; if (sizeof(Real) < 8) { std::cerr << "*** (but you are using less than double precision so it is not surprising)\n"; } return 1; } else { return 0; } } ================================================ FILE: test/test_array_speed.cpp ================================================ #include #define ADEPT_NO_AUTOMATIC_DIFFERENTIATION #define ADEPT_REAL_TYPE_SIZE 4 #include #include "Timer.h" #define ASSIGN = #define WARMUP_OPERATOR + exp #define OPERATOR + fastexp //#define SUFFIX_OP + 0.5 #define SUFFIX_OP using namespace adept; int main() { Timer timer; timer.print_on_exit(); int n = 128; static const int rep = 10000; // static const int rep = 10; std::cout << "Packet::size = " << internal::Packet::size << "\n"; Stack stack; aMatrix M(n,n), P(n,n), Q(n,n); // Array<2,aReal,false> M(n,n), P(n,n), Q(n,n); aReal Mc[n][n], Pc[n][n], Qc[n][n]; for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { P(i,j) = Pc[i][j] = 0.01 * (i-j); Q(i,j) = Qc[i][j] = 0.1 * (j+1); M(i,j) = Mc[i][j] = 0.0; } } int t_c_style_w = timer.new_activity("C-style for loops (warm-up)"); int t_c_style = timer.new_activity("C-style for loops"); int t_adept_w = timer.new_activity("Adept (warm-up)"); int t_adept = timer.new_activity("Adept"); int t_adept_container_w = timer.new_activity("Adept container only (warm-up)"); int t_adept_container = timer.new_activity("Adept container only"); #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION int t_jacobian_w = timer.new_activity("Jacobian (warm-up)"); int t_jacobian = timer.new_activity("Jacobian"); int t_jacobian_array_w = timer.new_activity("Jacobian array-op (warm-up)"); int t_jacobian_array = timer.new_activity("Jacobian array-op"); #endif stack.new_recording(); timer.start(t_c_style_w); for (int irep = 0; irep < rep; ++irep) { for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { Mc[i][j] ASSIGN Pc[i][j] WARMUP_OPERATOR (Qc[i][j] SUFFIX_OP); } } } timer.stop(); if (n <= 10) { std::cout << "C-style M = \n"; for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { std::cout << " " << Mc[i][j]; } std::cout << "\n"; } } // std::cout << stack; stack.new_recording(); timer.start(t_c_style); for (int irep = 0; irep < rep; ++irep) { for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { Mc[i][j] ASSIGN Pc[i][j] OPERATOR (Qc[i][j] SUFFIX_OP); } } } timer.stop(); // std::cout << stack; #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION stack.independent(&Pc[0][0], n*n); stack.dependent(&Mc[0][0], n*n); timer.start(t_jacobian_w); Real* jac; jac = new Real[n*n*n*n]; stack.jacobian_forward(jac); timer.stop(); timer.start(t_jacobian); stack.jacobian_forward(jac); timer.stop(); #endif // std::cout << Mc[0][0] << " " << Mc[10][10] << "\n"; stack.new_recording(); timer.start(t_adept_w); for (int irep = 0; irep < rep; ++irep) { // M ASSIGN noalias(P WARMUP_OPERATOR (Q SUFFIX_OP)); M ASSIGN P WARMUP_OPERATOR (Q SUFFIX_OP); } timer.stop(); // std::cout << stack; if (n <= 10) { std::cout << "Array-style M = \n"; for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { std::cout << " " << M(i,j); } std::cout << "\n"; } } std::cout << "Alignment offset = " << (P OPERATOR (Q SUFFIX_OP)).alignment_offset() << "\n"; stack.new_recording(); timer.start(t_adept); for (int irep = 0; irep < rep; ++irep) { // M += noalias(P OPERATOR (Q SUFFIX_OP)); M ASSIGN P OPERATOR (Q SUFFIX_OP); } timer.stop(); // std::cout << stack; #ifndef ADEPT_NO_AUTOMATIC_DIFFERENTIATION stack.clear_independents(); stack.clear_dependents(); stack.independent(P); stack.dependent(Q); // stack.independent(P.data(), n*n); // stack.dependent(M.data(), n*n); std::cout << stack; timer.start(t_jacobian_array_w); stack.jacobian_forward(jac); timer.stop(); timer.start(t_jacobian_array); stack.jacobian_forward(jac); timer.stop(); #endif stack.new_recording(); timer.start(t_adept_container_w); for (int irep = 0; irep < rep; ++irep) { for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { M(i,j) ASSIGN P(i,j) WARMUP_OPERATOR (Q(i,j) SUFFIX_OP); } } } timer.stop(); // std::cout << stack; // std::cout << M; stack.new_recording(); timer.start(t_adept_container); for (int irep = 0; irep < rep; ++irep) { for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { M(i,j) ASSIGN P(i,j) OPERATOR (Q(i,j) SUFFIX_OP); } } } timer.stop(); // std::cout << stack; } ================================================ FILE: test/test_arrays.cpp ================================================ /* test_arrays.cpp - Test Adept's array functionality Copyright (C) 2016-2018 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. This program can be compiled to run in three ways: (1) normal compilation tests inactive arrays, (2) with -DALL_ACTIVE tests active arrays, and (3) "-DALL_ACTIVE -DADEPT_RECORDING_PAUSABLE" tests whether a "paused" recording correctly records nothing to the automatic-differentiation stack. */ #include #include #define ADEPT_BOUNDS_CHECKING 1 #include //#define TRAP_FLOATING_POINT_EXCEPTIONS 1 #ifdef TRAP_FLOATING_POINT_EXCEPTIONS #include #endif // The following controls whether to use active variables or not //#define ALL_ACTIVE 1 //#define MARVEL_STYLE 1 //#define ALL_COMPLEX 1 using namespace adept; int main(int argc, const char** argv) { using namespace adept; #ifdef TRAP_FLOATING_POINT_EXCEPTIONS feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW); #endif #ifdef ALL_ACTIVE #define IsActive true Stack stack; #else #define IsActive false #endif #define HEADING(MESSAGE) \ std::cout << "====================================================================\n" \ << " TESTING " << MESSAGE << "\n" #define COMMA , #define SIMPLE_EVAL(MESSAGE, TYPE, X, INIT, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPE X; \ if (INIT) { \ X = test. X; \ } \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #define EVAL(MESSAGE, TYPE, X, INIT, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPE X; \ if (INIT) { \ X = test. X; \ std::cout << #TYPE << " " << #X << " = " << X << "\n"; \ } \ else { \ std::cout << #TYPE << " " << #X << " = " << X << "\n"; \ } \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #ifdef ALL_ACTIVE #define EVAL2(MESSAGE, TYPEX, X, INITX, TYPEY, Y, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPEX X; \ if (INITX) { \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ else { \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ TYPEY Y; Y = test. Y; \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ int nop=stack.n_operations(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ std::cout << "Differential operations: " << stack.n_operations()-nop << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #else #define EVAL2(MESSAGE, TYPEX, X, INITX, TYPEY, Y, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPEX X; \ if (INITX) { \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ else { \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ TYPEY Y; Y = test. Y; \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #endif #define EVAL3(MESSAGE, TYPEX, X, INITX, TYPEY, Y, TYPEZ, Z, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPEX X; \ if (INITX) { \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ else { \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ TYPEY Y; Y.link( test. Y ); \ TYPEZ Z; Z.link( test. Z ); \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << #TYPEZ << " " << #Z << " = " << Z << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #define EVAL_NO_TRAP(MESSAGE, TYPE, X, INIT, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ { \ TYPE X; \ if (INIT) { \ X = test. X; \ std::cout << #TYPE << " " << #X << " = " << X << "\n"; \ } \ else { \ std::cout << #TYPE << " " << #X << " = " << X << "\n"; \ } \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } #define EVAL2_NO_TRAP(MESSAGE, TYPEX, X, INITX, TYPEY, Y, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ { \ TYPEX X; \ if (INITX) { \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ else { \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ } \ TYPEY Y; Y = test. Y; \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } #ifndef ALL_COMPLEX #ifdef ALL_ACTIVE #ifndef MARVEL_STYLE typedef aReal myReal; typedef aMatrix myMatrix; typedef aVector myVector; typedef aSymmMatrix mySymmMatrix; //typedef aSquareMatrix mySymmMatrix; typedef aDiagMatrix myDiagMatrix; typedef aTridiagMatrix myTridiagMatrix; typedef aLowerMatrix myLowerMatrix; typedef aUpperMatrix myUpperMatrix; typedef SpecialMatrix,true> myOddBandMatrix; typedef aArray3D myArray3D; #else typedef aReal myReal; typedef Array<2,aReal,false> myMatrix; typedef Array<1,aReal,false> myVector; typedef SpecialMatrix,false> mySymmMatrix; typedef SpecialMatrix,false> myDiagMatrix; typedef SpecialMatrix,false> myTridiagMatrix; typedef SpecialMatrix, false> myLowerMatrix; typedef SpecialMatrix, false> myUpperMatrix; typedef SpecialMatrix,false> myOddBandMatrix; #endif #else typedef Real myReal; typedef Matrix myMatrix; typedef Vector myVector; typedef Array3D myArray3D; typedef SymmMatrix mySymmMatrix; //typedef SquareMatrix mySymmMatrix; typedef DiagMatrix myDiagMatrix; typedef TridiagMatrix myTridiagMatrix; typedef LowerMatrix myLowerMatrix; typedef UpperMatrix myUpperMatrix; typedef SpecialMatrix,false> myOddBandMatrix; /* typedef SpecialMatrix,false> mySymmMatrix; typedef SpecialMatrix,false> myDiagMatrix; typedef SpecialMatrix,false> myTridiagMatrix; typedef SpecialMatrix,false> myOddBandMatrix; */ #endif #else typedef std::complex myReal; typedef Array<1,std::complex,IsActive> myVector; typedef Array<2,std::complex,IsActive> myMatrix; typedef Array<3,std::complex,IsActive> myArray3D; typedef SpecialMatrix,internal::SquareEngine,IsActive> mySymmMatrix; typedef SpecialMatrix,internal::BandEngine,IsActive> myDiagMatrix; typedef SpecialMatrix,internal::BandEngine,IsActive> myTridiagMatrix; typedef SpecialMatrix,internal::LowerEngine, IsActive> myLowerMatrix; typedef SpecialMatrix,internal::UpperEngine, IsActive> myUpperMatrix; typedef SpecialMatrix,internal::BandEngine,IsActive> myOddBandMatrix; #endif struct Test { bool b; boolVector B; int c; myReal x; myVector v, w, vlong; myMatrix M, N; myMatrix Mstrided; myMatrix S; mySymmMatrix O, P; myDiagMatrix D, E; myTridiagMatrix T, TT; myLowerMatrix L, LL; myUpperMatrix U, UU; myOddBandMatrix Q, R; intVector index; myArray3D A; #define MINI_TEST #ifdef MINI_TEST #define DIM1 3 #define DIM2 2 #define DIM3 5 #define DIMLONG 12 #else #define DIM1 12 #define DIM2 10 #define DIM3 15 #define DIMLONG 20 #endif Test() { #ifdef ALL_COMPLEX #define I std::complex(0.0,1.0) #else #define I 0.0 #endif b = false; B.resize(DIM1); B = false; c = 0; x = -2; v.resize(DIM1); vlong.resize(DIMLONG); vlong = linspace(1,DIMLONG,DIMLONG); w.resize(DIM1); M.resize(DIM2,DIM1); myMatrix Mtmp(DIM2*3,DIM1*2); Mstrided.link(Mtmp(stride(0,end,3),stride(0,end,2))); N.resize(DIM2,DIM1); S.resize(DIM1,DIM1); O.resize(DIM1); Q.resize(DIM3); index.resize(DIM2); v(0) = 2.0 + 3.0*I; v(1) = 3; v(2) = 5; w(0) = 7.0 + 4.0*I; w(1) = 11; w(2) = 13; M(0,0) = 2.0 + 3.0*I; M(0,1) = 3; M(0,2) = 5; M(1,0) = 7; M(1,1) = 11; M(1,2) = 13; Mstrided = M; N(0,0) = 17.0+5.0*I; N(0,1) = 19; N(0,2) = 23; N(1,0) = 29; N(1,1) = 31; N(1,2) = 37; S(0,0) = 2.0+3.0*I; S(0,1) = 3; S(0,2) = 5; S(1,0) = 7.0+4.0*I; S(1,1) = 11; S(1,2) = 13; S(2,0) = 17; S(2,1) = 19; S(2,2) = 23; O(0,0) = 7.0+3.0*I; O(1,0) = 2; O(1,1) = 11; O(2,0) = 3; O(2,1) = 5; O(2,2) = 13; P = 14.0 - O; Q.diag_vector(-2) = 1; Q.diag_vector(-1) = 2; Q.diag_vector(0) = 3; Q.diag_vector(1) = 4; D = S; T = S; L = S; U = S; A.resize(DIM2,DIM1,DIM2); A << 2.0+3.0*I, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31,37; index << 1, 0; } }; #ifdef ALL_ACTIVE #ifndef ADEPT_RECORDING_PAUSABLE stack.new_recording(); #else stack.pause_recording(); #endif #endif Test test; bool should_fail=false; int anomalous_results=0; std::cout << adept::configuration(); #ifdef ALL_ACTIVE std::cout << "Testing ACTIVE arrays\n"; #else std::cout << "Testing INACTIVE arrays\n"; #endif #ifdef ALL_COMPLEX std::cout << "Testing COMPLEX arrays\n"; #endif HEADING("ARRAY FUNCTIONALITY"); EVAL("Array \"resize\" member function", myMatrix, M, true, M.resize(1,5)); should_fail=true; EVAL("Array \"resize\" with invalid dimensions", myMatrix, M, true, M.resize(1)); should_fail=false; EVAL("Array \"resize\" with \"dimensions\" function", myMatrix, M, true, M.resize(dimensions(4,2))); EVAL("Array \"clear\" member function", myMatrix, M, true, M.clear()); #ifdef ADEPT_CXX11_FEATURES HEADING("INITIALIZER LISTS (C++11 ONLY)"); EVAL("Vector assignment to initializer list from empty", myVector, v, false, v = {1 COMMA 2}); EVAL("Vector assignment to initializer list with underfill", myVector, v, true, v = {1.0 COMMA 2.0}); should_fail = true; EVAL("Vector assignment to initializer list with overfill (SHOULD FAIL)", myVector, v, true, v = {1.0 COMMA 2.0 COMMA 3.0 COMMA 4.0}); should_fail = false; EVAL("Matrix assignment to initializer list from empty", myMatrix, M, false, M = { {1 COMMA 2} COMMA {3 COMMA 4} }); EVAL("Matrix assignment to initializer list with underfill", myMatrix, M, true, M = { {1.0 COMMA 2.0} COMMA {3.0 COMMA 4.0} }); should_fail = true; EVAL("Matrix assignment to initializer list with overfill (SHOULD FAIL)", myMatrix, M, true, M = { {1.0 COMMA 2.0 COMMA 3.0 COMMA 4.0} }); should_fail = false; EVAL("Initializer list in expression", myVector, v, true, v = v + Vector({1.0 COMMA 2.0 COMMA 3.0})); EVAL2("Indexed matrix assigned to initializer list", myMatrix, M, true, intVector, index, M(index,index) = {{1 COMMA 2} COMMA {3 COMMA 4}}); #endif HEADING("BASIC EXPRESSIONS"); EVAL2("Vector assignment to vector from empty", myVector, v, false, myVector, w, v = w); EVAL2("Vector assignment to expression from empty", myVector, v, false, myVector, w, v = log(w) + 1.0); /* should_fail=true; EVAL("Vector = operator from empty (SHOULD FAIL)", myVector, v, false, v = 1.0); EVAL("Vector += operator from empty (SHOULD FAIL)", myVector, v, false, v += 1.0); should_fail=false; */ EVAL("Matrix *= operator", myMatrix, M, true, M *= 0.5); EVAL2("Matrix = scalar", myMatrix, M, true, myReal, x, M = x); EVAL2("Matrix = scalar expression", myMatrix, M, true, myReal, x, M = (10.0*x)); #ifndef ALL_COMPLEX HEADING("BASIC FUNCTIONS"); EVAL2("max", myVector, v, true, myVector, w, v = max(v,w/3.0)); EVAL2("min", myVector, v, true, myVector, w, v = min(v,w/3.0)); #endif HEADING("ARRAY SLICING"); EVAL2("Array indexing rvalue", myReal, x, true, myMatrix, M, x = M(1,end-1)); should_fail=true; EVAL2("Array indexing rvalue out of range (SHOULD FAIL)", myReal, x, true, myMatrix, M, x = M(1,3)); should_fail=false; EVAL("Array indexing lvalue", myMatrix, M, true, M(1,end-1) *= -1.0); EVAL2("contiguous subarray rvalue", myVector, v, false, myMatrix, M, v = M(__,end)); EVAL("contiguous subarray lvalue", myMatrix, M, true, M(end-1,__) /= 2.0); EVAL2("contiguous subarray rvalue and lvalue", myMatrix, M, true, myMatrix, N, M(__,1) = N(__,2)); EVAL2("contiguous subarray rvalue using range", myVector, v, false, myMatrix, M, v = 2.0 * M(1,range(1,2))); EVAL2("contiguous subarray lvalue using range", myMatrix, M, true, myVector, v, M(end-1,range(0,1)) = log(v(range(1,2)))); EVAL2("contiguous subarray rvalue using subset", myMatrix, M, false, myMatrix, N, M = 2.0 * N.subset(1,1,1,2)); EVAL("contiguous subarray lvalue using subset", myVector, v, true, v.subset(end-1,end) *= 10.0); EVAL2("regular subarray rvalue", myVector, v, false, myVector, w, v = w(stride(end,0,-1))); EVAL2("regular subarray lvalue", myMatrix, M, true, myVector, w, M(0,stride(0,end,2)) *= w(stride(end,0,-2))); #ifndef ALL_COMPLEX EVAL2("irregular subarray rvalue", myMatrix, M, false, myMatrix, N, M = N(stride(1,0,-1),find(N(0,__)>18))); EVAL("irregular subarray lvalue", myMatrix, M, true, M(stride(1,0,-1),find(M(0,__)>4)) = 0); #endif EVAL("slice leading dimension", myMatrix, M, true, M[end] = 0); EVAL("slice two dimensions", myMatrix, M, true, M[end][0] = 0); EVAL2("diag_vector member function as rvalue", myVector, v, false, myMatrix, S, v = diag_vector(S,1)); EVAL2("diag_vector member function as lvalue", myMatrix, S, true, myVector, v, S.diag_vector() += v); EVAL2("diag_matrix member function", myMatrix, S, false, myVector, v, S = v.diag_matrix()); EVAL2("diag_matrix external function", myMatrix, S, false, myVector, v, S = diag_matrix(v)); EVAL2("transpose as rvalue via T member function", myMatrix, N, false, myMatrix, M, N = 2.0 * M.T()); EVAL2("transpose as rvalue via permute member function", myMatrix, N, false, myMatrix, M, N = 2.0 * M.permute(1,0)); EVAL3("matrix indexing (scalar,non-contiguous)", myVector, v, false, myMatrix, N, intVector, index, v = N(1,index)); EVAL3("matrix indexing (non-contiguous,scalar)", myVector, v, false, myMatrix, N, intVector, index, v = N(index,1)); EVAL3("2D arbitrary index as rvalue", myMatrix, M, false, myMatrix, N, intVector, index, M = const_cast(N)(index,index)); EVAL3("2D arbitrary index as lvalue assigned to scalar expression", myMatrix, M, true, myMatrix, N, intVector, index, M(index,index) = 2.0*(myReal)(4.0)); EVAL3("2D arbitrary index as lvalue", myMatrix, M, true, myMatrix, N, intVector, index, M(index,index) = N(__,range(1,2))); EVAL2("2D arbitrary index as lvalue with assign-multiply operator", myMatrix, M, true, intVector, index, M(index,index) *= 10.0); EVAL2("2D arbitrary index as lvalue with aliased right-hand-side", myMatrix, M, true, intVector, index, M(index,index) = M(__,range(0,1))); EVAL2("2D arbitrary index as lvalue with aliased right-hand-side and eval function", myMatrix, M, true, intVector, index, M(index,index) = eval(M(__,range(0,1)))); EVAL2("reshape member function", myMatrix, M, false, myVector, vlong, M >>= vlong.reshape(3,4)); should_fail=true; EVAL2("reshape member function with invalid dimensions", myMatrix, M, false, myVector, vlong, M >>= vlong.reshape(5,5)); should_fail=false; EVAL("end/2 indexing", myVector, vlong, true, vlong(range(end/2,end)) = 0.0); EVAL("end/2 indexing", myVector, vlong, true, vlong(range(0,end/2)) = 0.0); EVAL("end/2 indexing", myVector, vlong, true, vlong.subset(end/2,end) = 0.0); HEADING("REDUCTION OPERATIONS"); EVAL2("full sum", myReal, x, true, myMatrix, M, x = sum(M)); EVAL2("full mean", myReal, x, true, myMatrix, M, x = mean(M)); EVAL2("full product", myReal, x, true, myMatrix, M, x = product(M)); EVAL2("full norm2", myReal, x, true, myMatrix, M, x = norm2(M)); #ifndef ALL_COMPLEX EVAL2("full maxval", myReal, x, true, myMatrix, M, x = maxval(M)); EVAL2("full minval", myReal, x, true, myMatrix, M, x = minval(-M)); #endif EVAL2("1-dimension sum", myVector, v, true, myMatrix, M, v += sum(M,0)); EVAL2("1-dimension mean", myVector, v, false, myMatrix, M, v = mean(M*M,1)); EVAL2("1-dimension product", myVector, v, false, myMatrix, M, v = product(M,1)); EVAL2("1-dimension norm2", myVector, v, false, myMatrix, M, v = norm2(M,1)); // EVAL2("1-dimension sum", myMatrix, M, false, myArray3D, A, M = sum(A,2)); #ifndef ALL_COMPLEX EVAL2("1-dimension maxval", myVector, v, false, myMatrix, M, v = maxval(M,1)); EVAL2("1-dimension minval", myVector, v, false, myMatrix, M, v = minval(M,1)); EVAL2("dot product", myReal, x, true, myVector, w, x = dot_product(w,w(stride(end,0,-1)))); EVAL2("dot product on expressions", myReal, x, true, myVector, w, x = dot_product(2.0*w,w(stride(end,0,-1))+1.0)); EVAL2("1D interpolation", myVector, v, true, myVector, w, v = interp(value(v), w, Vector(value(w)/2.0))); EVAL2("1D clamped interpolation", myVector, v, true, myVector, w, v = interp(value(v), w, value(w)/2.0, ADEPT_EXTRAPOLATE_CLAMP)); #ifndef ALL_ACTIVE EVAL2("1D interpolation of matrix", myMatrix, M, true, myVector, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0)); EVAL2("1D clamped interpolation of matrix", myMatrix, M, true, myVector, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0, ADEPT_EXTRAPOLATE_CLAMP)); #endif EVAL2("all reduction", bool, b, true, myMatrix, M, b = all(M > 8.0)); EVAL2("any reduction", bool, b, true, myMatrix, M, b = any(M > 8.0)); EVAL2("count reduction", int, c, true, myMatrix, M, c = count(M > 8.0)); EVAL2("1-dimension all reduction", boolVector, B, false, myMatrix, M, B = all(M > 8.0, 1)); EVAL2("1-dimension any reduction", boolVector, B, false, myMatrix, M, B = any(M > 8.0, 1)); EVAL2("1-dimension count reduction", intVector, index, false, myMatrix, M, index = count(M > 8.0, 1)); HEADING("CONDITIONAL OPERATIONS"); EVAL2("where construct, scalar right-hand-side", myMatrix, M, true, myMatrix, N, M.where(N > 20) = 0); EVAL2("where construct, expression right-hand-side", myMatrix, M, true, myMatrix, N, M.where(N > 20) = -N); EVAL2("where construct, scalar either-or right-hand-side", myMatrix, M, true, myMatrix, N, M.where(N > 20) = either_or(0,1)); EVAL2("where construct, expression either-or right-hand-side", myMatrix, M, true, myMatrix, N, M.where(N > 20) = either_or(-N,N)); EVAL_NO_TRAP("find construct, scalar right-hand-side", myVector, v, true, v(find(v > 3.5)) = 0); EVAL("find construct, expression right-hand-side", myVector, v, true, v(find(v > 3.5)) = -v(range(end,end))); EVAL("find construct, multiply-assign right-hand-side", myVector, v, true, v(find(v != 5.0)) *= 10.0); #endif HEADING("SPECIAL SQUARE MATRICES"); EVAL("SymmMatrix \"resize\" member function", mySymmMatrix, O, true, O.resize(5)); should_fail = true; EVAL("SymmMatrix \"resize\" with invalid dimensions", mySymmMatrix, O, true, O.resize(4,5)); should_fail = false; EVAL("SymmMatrix \"clear\" member function", mySymmMatrix, O, true, O.clear()); EVAL2("SymmMatrix assign from dense matrix", mySymmMatrix, O, false, myMatrix, S, O = S); EVAL2("DiagMatrix assign from dense matrix", myDiagMatrix, D, false, myMatrix, S, D = S); EVAL2("TridiagMatrix assign from dense matrix", myTridiagMatrix, T, false, myMatrix, S, T = S); EVAL2("LowerMatrix assign from dense matrix", myLowerMatrix, L, false, myMatrix, S, L = S); EVAL2("UpperMatrix assign from dense matrix", myUpperMatrix, U, false, myMatrix, S, U = S); EVAL("SymmMatrix += operator", mySymmMatrix, O, true, O += 3.0); EVAL("DiagMatrix += operator", myDiagMatrix, D, true, D += 3.0); EVAL("TridiagMatrix += operator", myTridiagMatrix, T, true, T += 3.0); EVAL("LowerMatrix += operator", myLowerMatrix, L, true, L += 3.0); EVAL("UpperMatrix += operator", myUpperMatrix, U, true, U += 3.0); EVAL2("SymmMatrix as rvalue", myMatrix, M, false, mySymmMatrix, O, M = O); EVAL2("DiagMatrix as rvalue", myMatrix, M, false, myDiagMatrix, D, M = D); EVAL2("TridiagMatrix as rvalue", myMatrix, M, false, myTridiagMatrix, T, M = T); EVAL2("LowerMatrix as rvalue", myMatrix, M, false, myLowerMatrix, L, M = L); EVAL2("UpperMatrix as rvalue", myMatrix, M, false, myUpperMatrix, U, M = U); EVAL("SymmMatrix assign from scalar expression", mySymmMatrix, O, true, O = 2.0*(myReal)(4.0)); EVAL("UpperMatrix assign from scalar expression", myUpperMatrix, U, true, U = 2.0*(myReal)(4.0)); EVAL("SymmMatrix diag_vector member function as lvalue (upper)", mySymmMatrix, O, true, O.diag_vector(1) = 0); EVAL("SymmMatrix diag_vector member function as lvalue (lower)", mySymmMatrix, O, true, O.diag_vector(-2) += 10.0); EVAL("DiagMatrix diag_vector member function as lvalue", myDiagMatrix, D, true, D.diag_vector() = 0.0); should_fail = true; EVAL("DiagMatrix diag_vector member function incorrectly using offdiagonal", myDiagMatrix, D, true, D.diag_vector(1) = 0.0); should_fail = false; EVAL("TridiagMatrix diag_vector member function as lvalue (upper)", myTridiagMatrix, T, true, T.diag_vector(1) += 10.0); EVAL("TridiagMatrix diag_vector member function as lvalue (lower)", myTridiagMatrix, T, true, T.diag_vector(-1) = 0.0); EVAL("LowerMatrix diag_vector member function as lvalue (lower)", myLowerMatrix, L, true, L.diag_vector(-1) = 0.0); should_fail = true; EVAL("LowerMatrix diag_vector member function as lvalue (upper)", myLowerMatrix, L, true, L.diag_vector(1) = 0.0); EVAL("UpperMatrix diag_vector member function as lvalue (lower)", myUpperMatrix, U, true, U.diag_vector(-1) = 0.0); should_fail = false; EVAL("UpperMatrix diag_vector member function as lvalue (upper)", myUpperMatrix, U, true, U.diag_vector(1) = 0.0); EVAL("Odd band matrix \"diag_vector\" member function", myOddBandMatrix, Q, true, Q.diag_vector(1) = -1.0); EVAL("Odd band matrix \"diag_vector\" member function", myOddBandMatrix, Q, true, Q.diag_vector(0) = -1.0); EVAL("Odd band matrix \"diag_vector\" member function", myOddBandMatrix, Q, true, Q.diag_vector(-1) = -1.0); EVAL("Odd band matrix \"diag_vector\" member function", myOddBandMatrix, Q, true, Q.diag_vector(-2) = -1.0); EVAL2("Array submatrix_on_diagonal member function", myMatrix, M, false, myMatrix, S, M = S.submatrix_on_diagonal(1,2)); EVAL("Array submatrix_on_diagonal member function as lvalue", myMatrix, S, true, S.submatrix_on_diagonal(0,1) = 0.0); should_fail = true; EVAL2("Array submatrix_on_diagonal member function to non-square matrix", myMatrix, M, false, myMatrix, N, M = N.submatrix_on_diagonal(1,2)); should_fail = false; EVAL2("SymmMatrix submatrix_on_diagonal member function", mySymmMatrix, P, false, mySymmMatrix, O, P = O.submatrix_on_diagonal(1,2)); EVAL2("DiagMatrix submatrix_on_diagonal member function", myDiagMatrix, E, false, myDiagMatrix, D, E = D.submatrix_on_diagonal(1,2)); EVAL2("TridiagMatrix submatrix_on_diagonal member function", myTridiagMatrix, TT, false, myTridiagMatrix, T, TT = T.submatrix_on_diagonal(1,2)); EVAL2("LowerMatrix submatrix_on_diagonal member function", myLowerMatrix, LL, false, myLowerMatrix, L, LL = L.submatrix_on_diagonal(1,2)); EVAL2("UpperMatrix submatrix_on_diagonal member function", myUpperMatrix, UU, false, myUpperMatrix, U, UU = U.submatrix_on_diagonal(1,2)); EVAL2("Odd band matrix submatrix_on_diagonal member function", myOddBandMatrix, R, false, myOddBandMatrix, Q, R = Q.submatrix_on_diagonal(1,3)); EVAL("Odd band matrix submatrix_on_diagonal as lvalue", myOddBandMatrix, Q, true, Q.submatrix_on_diagonal(1,3) = -1); EVAL2("SymmMatrix transpose as rvalue via T member function", mySymmMatrix, P, false, mySymmMatrix, O, P = O.T()); EVAL2("DiagMatrix transpose as rvalue via T member function", myDiagMatrix, E, false, myDiagMatrix, D, E = D.T()); EVAL2("TridiagMatrix transpose as rvalue via T member function", myTridiagMatrix, TT, false, myTridiagMatrix, T, TT = T.T()); EVAL2("LowerMatrix transpose as rvalue via T member function", myUpperMatrix, U, false, myLowerMatrix, L, U = L.T()); EVAL2("UpperMatrix transpose as rvalue via T member function", myLowerMatrix, L, false, myUpperMatrix, U, L = U.T()); HEADING("EXPANSION OPERATIONS"); EVAL2("Outer product", myMatrix, M, false, myVector, v, M = outer_product(v,v)); EVAL2("Outer product on indexed array", myMatrix, M, false, myVector, v, M = outer_product(v,v(stride(end,0,-1)))); EVAL2("Outer product on expressions", myMatrix, M, false, myVector, v, M = outer_product(2.0*v,v-1.0)); EVAL2("Vector spread of dimension 0", myMatrix, M, false, myVector, v, M = spread<0>(v,2)); EVAL2("Vector spread of dimension 1", myMatrix, M, false, myVector, v, M = spread<1>(v,2)); EVAL2("Vector spread with expression argument", myMatrix, M, false, myVector, v, M = spread<1>(v*2.0,2)); EVAL2("Matrix spread of dimension 0", myArray3D, A, false, myMatrix, M, A = spread<0>(M,2)); EVAL2("Matrix spread of dimension 1", myArray3D, A, false, myMatrix, M, A = spread<1>(M,2)); EVAL2("Matrix spread of dimension 2", myArray3D, A, false, myMatrix, M, A = spread<2>(M,2)); #ifndef ALL_COMPLEX #ifndef MARVEL_STYLE if (adept::have_matrix_multiplication()) { HEADING("MATRIX MULTIPLICATION"); EVAL3("Matrix-Vector multiplication", myVector, w, false, myMatrix, M, myVector, v, w = M ** v); EVAL3("Matrix-Vector multiplication with strided matrix", myVector, w, false, myMatrix, Mstrided, myVector, v, w = Mstrided ** v); EVAL2("Matrix-Matrix multiplication", myMatrix, M, false, myMatrix, N, M = N.T() ** N); EVAL2("Matrix-Matrix multiplication with matmul", myMatrix, M, false, myMatrix, N, M = matmul(N.T(), N)); should_fail = true; EVAL2("Matrix-Matrix multiplication with inner dimension mismatch", myMatrix, M, false, myMatrix, N, M = N ** N); should_fail = false; // TESTING! EVAL2("Matrix-Matrix-Vector multiplication", myVector, v, true, myMatrix, S, v = S ** S ** v); EVAL2("Matrix-Matrix-Vector multiplication", myVector, v, false, myMatrix, S, v = S ** log(S) ** S(0,__)); EVAL2("Vector-Matrix multiplication", myVector, v, true, myMatrix, S, v = v ** S); EVAL2("Vector-Matrix multiplication with matmul", myVector, v, true, myMatrix, S, v = matmul(v, S)); EVAL2("SymmMatrix-Vector multiplication", myVector, v, true, mySymmMatrix, O, v = O ** v); EVAL2("SymmMatrix-Matrix multiplication", myMatrix, S, true, mySymmMatrix, O, S = O ** S); EVAL2("Vector-SymmMatrix multiplication", myVector, v, true, mySymmMatrix, O, v = v ** O); EVAL2("Matrix-SymmMatrix multiplication", myMatrix, M, true, mySymmMatrix, O, M = M ** O); EVAL2("DiagMatrix-Vector multiplication", myVector, v, true, myDiagMatrix, D, v = D ** v); EVAL2("TridiagMatrix-Vector multiplication", myVector, v, true, myTridiagMatrix, T, v = T ** v); EVAL2("TridiagMatrix-Matrix multiplication", myMatrix, S, true, myTridiagMatrix, T, S = T ** S); EVAL2("LowerMatrix-Matrix multiplication", myMatrix, S, true, myLowerMatrix, L, S = L ** S); EVAL2("Vector-TridiagMatrix multiplication", myVector, v, true, myTridiagMatrix, T, v = v ** T); EVAL2("Matrix-TridiagMatrix multiplication", myMatrix, M, true, myTridiagMatrix, T, M = M ** T); } else { std::cout << "NO MATRIX MULTIPLICATION TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\n"; } #ifndef ALL_ACTIVE if (adept::have_linear_algebra()) { HEADING("LINEAR ALGEBRA"); EVAL2("Solving general linear equations Ax=b", myVector, v, true, myMatrix, S, v = solve(S,v)); EVAL2("Solving general linear equations Ax=b with expression arguments", myVector, v, true, myMatrix, S, v = solve(S,2*v)); EVAL2("Solving general linear equations AX=B", myMatrix, M, true, myMatrix, S, M.T() = solve(S,M.T())); EVAL2("Solving general linear equations AX=B with expression arguments", myMatrix, M, true, myMatrix, S, M.T() = solve(2.0 * S,2.0 * M.T())); EVAL2("Solving linear equations Ax=b with symmetric A", myVector, v, true, mySymmMatrix, O, v = solve(O,v)); EVAL2("Solving linear equations AX=B with symmetric A", myMatrix, M, true, mySymmMatrix, O, M.T() = solve(O,M.T())); EVAL3("Solving linear equations AX=B with symmetric A and B", myMatrix, S, false, mySymmMatrix, O, mySymmMatrix, P, S = solve(O,P)); EVAL2("Solving linear equations Ax=b with upper-triangular A", myVector, v, true, myUpperMatrix, U, v = solve(U,v)); EVAL2("Invert general matrix", myMatrix, M, false, myMatrix, S, M = inv(S)); EVAL2("Invert symmetric matrix", mySymmMatrix, P, false, mySymmMatrix, O, P = inv(O)); } else { std::cout << "NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\n"; } #else std::cout << "NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ACTIVE ARRAYS NOT YET SUPPORTED\n"; #endif #else std::cout << "NO MATRIX TESTS PERFORMED BECAUSE USING MARVEL-STYLE ACTIVE ARRAYS\n"; #endif #endif HEADING("FILLING ARRAYS"); EVAL("Fill vector with \"<<\"", myVector, v, true, (v << 0.1, 0.2)); should_fail = true; EVAL("Overfill vector with \"<<\"", myVector, v, true, (v << 0.1, 0.2, 0.3, 0.4)); should_fail = false; EVAL("Underfill matrix with \"<<\"", myMatrix, M, true, (M << 0.1, 0.2, 0.3, 0.4, 0.5)); EVAL("Fill matrix with \"<<\"", myMatrix, M, true, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6)); should_fail = true; EVAL("Overfill matrix with \"<<\"", myMatrix, M, true, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)); should_fail = false; EVAL("Fill vector with vectors using \"<<\"", myVector, v, true, v << v(range(1,2)) << 0.1); EVAL2("Fill matrix with vector using \"<<\"", myMatrix, M, true, myVector, v, M << 0.1 << 0.2 << 0.3 << v); EVAL2("Fill matrix with vector using \"<<\"", myMatrix, S, true, myVector, v, S << v << v << v); EVAL("Assign array using range", myVector, v, false, v = range(3,6)); HEADING("PRINTING WITH PLAIN STYLE"); adept::set_array_print_style(PRINT_STYLE_PLAIN); SIMPLE_EVAL("Printing empty vector", myVector, v, false, std::cout << v << '\n'); SIMPLE_EVAL("Printing vector", myVector, v, true, std::cout << v << '\n'); SIMPLE_EVAL("Printing matrix", myMatrix, M, true, std::cout << M << '\n'); SIMPLE_EVAL("Printing 3D array", myArray3D, A, true, std::cout << A << '\n'); HEADING("PRINTING WITH CSV STYLE"); adept::set_array_print_style(PRINT_STYLE_CSV); SIMPLE_EVAL("Printing empty vector", myVector, v, false, std::cout << v << '\n'); SIMPLE_EVAL("Printing vector", myVector, v, true, std::cout << v << '\n'); SIMPLE_EVAL("Printing matrix", myMatrix, M, true, std::cout << M << '\n'); SIMPLE_EVAL("Printing 3D array", myArray3D, A, true, std::cout << A << '\n'); HEADING("PRINTING WITH CURLY STYLE"); adept::set_array_print_style(PRINT_STYLE_CURLY); SIMPLE_EVAL("Printing empty vector", myVector, v, false, std::cout << v << '\n'); SIMPLE_EVAL("Printing vector", myVector, v, true, std::cout << v << '\n'); SIMPLE_EVAL("Printing matrix", myMatrix, M, true, std::cout << M << '\n'); SIMPLE_EVAL("Printing 3D array", myArray3D, A, true, std::cout << A << '\n'); HEADING("PRINTING WITH MATLAB STYLE"); adept::set_array_print_style(PRINT_STYLE_MATLAB); SIMPLE_EVAL("Printing empty vector", myVector, v, false, std::cout << v << '\n'); SIMPLE_EVAL("Printing vector", myVector, v, true, std::cout << v << '\n'); SIMPLE_EVAL("Printing matrix", myMatrix, M, true, std::cout << M << '\n'); SIMPLE_EVAL("Printing 3D array", myArray3D, A, true, std::cout << A << '\n'); adept::set_array_print_style(PRINT_STYLE_CURLY); HEADING("EXPRESSION PRINTING"); EVAL("Send expression to standard output", myMatrix, M, true, std::cout << M(0,__) + M(1,__) << '\n'); EVAL("Send scalar expression to standard output", myVector, v, true, std::cout << v(0) + v(1) << '\n'); #ifdef ADEPT_BOUNDS_CHECKING HEADING("BOUNDS CHECKING"); should_fail = true; EVAL("Access vector out of bounds", myVector, v, true, v(0) = v(4)); EVAL("Access vector out of bounds", myVector, v, true, v(0) = v(end-4)); EVAL("Access matrix out of bounds", myMatrix, M, true, M(0,0) = M(0,-1)); EVAL("Access matrix out of bounds", myMatrix, M, true, M(0,0) = M(end+1,1)); should_fail = false; #endif std::cout << "====================================================================\n"; #ifdef ALL_ACTIVE std::cout << stack; std::cout << "====================================================================\n"; #endif if (anomalous_results > 0) { std::cout << "*** In terms of run-time errors, there were " << anomalous_results << " incorrect results\n"; } else { std::cout << "In terms of run-time errors, all tests were passed\n"; } #ifdef ALL_ACTIVE #ifdef ADEPT_RECORDING_PAUSABLE if (stack.n_statements() > 1) { std::cout << "*** Stack contains " << stack.n_statements()-1 << " statements and " << stack.n_operations() << " operations but both should be 0 because recording has been paused\n"; return 1; } #endif #endif if (anomalous_results > 0) { return 1; } else { return 0; } } ================================================ FILE: test/test_checkpoint.cpp ================================================ /* test_checkpoint.cpp - Test manual checkpointing of a simulation Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #include //#include #include "adept.h" // This header file is in the same directory as adept.h in the Adept // package #include "Timer.h" using adept::adouble; // Number of points in spatial grid of simulation #define NX 100 // "Toon" advection scheme applied to linear advection in a 1D // periodic domain - see Adept paper for details static void toon(int nt, double c, const adouble q_init[NX], adouble q[NX]) { adouble flux[NX-1]; // Fluxes between boxes for (int i=0; i= 0; i--) { stack.new_recording(); toon(nt, dt, q_save[i], q); // This time we use the set of gradients output from the previous // simulation (which can be thought of as dJ/dq_save[i+1]) as // the input gradients for the next adept::set_gradients(q, NX, dJ_dq); // Perform adjoint calculation stack.reverse(); // Extract the next set of gradients (which can be thought of as // dJ/dq_save[i]) and place in dJ_dq ready for the next // iteration adept::get_gradients(q_save[i], NX, dJ_dq); } // Print out the gradients std::cout << "dJ_dq=["; for (int i = 0; i < NX; i++) { std::cout << " " << dJ_dq[i]; nan_appeared = nan_appeared || std::isnan(dJ_dq[i]); } std::cout << "]\n"; std::cout << stack; } timer.stop(); if (nan_appeared) { std::cerr << "*** Error: some NaNs appeared\n"; return 1; } else { return 0; } } ================================================ FILE: test/test_constructors.cpp ================================================ /* test_constructors.cpp - Test Adept's selection of constructors in a range of scenarios Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #define ADEPT_BOUNDS_CHECKING 1 #define ADEPT_VERBOSE_FUNCTIONS #define ADEPT_NO_ALIAS_CHECKING #include using namespace adept; Vector square(const Vector& v) { std::cout << " inside function\n"; return v*v; } void square_in_place(Vector& v) { std::cout << " inside function\n"; v *= v; } Vector square_copy(Vector v) { std::cout << " inside function\n"; v *= -1.0; return v*v; } #define COMMA , #define EVAL_CONSTRUCT(MSG,X,COMMAND) std::cout << "--------------------------------------------------------------------\n" \ << MSG << "\n" \ << #COMMAND << "\n"; \ COMMAND; \ std::cout << #X << " = " << X << "\n" #define EVAL(MSG,X,COMMAND) std::cout << "--------------------------------------------------------------------\n" \ << MSG << "\n" \ << #X << " = " << X << "\n" \ << #COMMAND << "\n"; \ COMMAND; \ std::cout << #X << " = " << X << "\n" #define EVAL2(MSG,X,COMMAND,Y) std::cout << "--------------------------------------------------------------------\n" \ << MSG << "\n" \ << #X << " = " << X << "\n" \ << #COMMAND << "\n"; \ COMMAND; \ std::cout << #X << " = " << X << "\n" \ << #Y << " = " << Y << "\n" #define EVAL_FAIL(MSG,X,COMMAND) std::cout << "--------------------------------------------------------------------\n" \ << MSG << "\n" \ << #COMMAND << "\n" \ << "DOES NOT COMPILE (INCORRECT BEHAVIOUR)\n" #define EVAL2_FAIL(MSG,X,COMMAND,Y) std::cout << "--------------------------------------------------------------------\n" \ << MSG << "\n" \ << #COMMAND << "\n" \ << "DOES NOT COMPILE (INCORRECT BEHAVIOUR)\n" #define VERDICT98(MSG) std::cout << "Verdict for C++98: " << MSG << "\n" #define VERDICT11(MSG) std::cout << "Verdict for C++11: " << MSG << "\n" #define HEADING(MSG) std::cout << "####################################################################\n" \ << MSG << "\n" int main() { Vector v(2), w(2), v_data(2), v_const_data(2); v_data << 2, 3; v_const_data << 5, 7; v = v_data; const Vector v_const = v_const_data; adept::Stack stack; stack.new_recording(); { HEADING("COPY CONSTRUCTORS"); EVAL2("Passing Vector as argument to Vector copy constructor", v, const Vector v2(v), v2); VERDICT98("correct"); VERDICT11("should perform deep copy"); EVAL2("Passing Vector as argument to const Vector copy constructor", v, const Vector v_const(v), v_const); VERDICT98("correct"); VERDICT11("should perform deep copy"); EVAL2("Passing const Vector as argument to const Vector copy constructor", v_const, const Vector v_const2(v_const), v_const2); VERDICT98("correct"); VERDICT11("should perform deep copy"); EVAL2("Passing const Vector as argument to Vector copy constructor", v_const, Vector v3(v_const), v3); VERDICT98("should not compile"); VERDICT11("should perform deep copy"); } #ifdef ADEPT_CXX11_FEATURES HEADING("INITIALIZER LISTS"); EVAL_CONSTRUCT("Construct Vector from initializer list of ints", v1, Vector v1 = {1 COMMA 2 COMMA 3}); EVAL_CONSTRUCT("Construct Vector from initializer list of doubles", v1d, Vector v1d = {1.0 COMMA 2.0 COMMA 3.0}); EVAL_CONSTRUCT("Construct Matrix from initializer list", M, Matrix M = { {1 COMMA 2} COMMA {3} } ); EVAL_CONSTRUCT("Construct Array3D from initializer list", A3, Array3D A3 = { { {1 COMMA 2} COMMA {3} } COMMA { { 4 } } } ); EVAL_CONSTRUCT("Construct FixedVector from initializer list", fv1, Vector3 fv1 = {1 COMMA 2}); EVAL_CONSTRUCT("Construct FixedMatrix from initializer list", fM, Matrix33 fM = { {1 COMMA 2} COMMA {3} } ); EVAL_CONSTRUCT("Construct FixedArray3D from initializer list", fA3, FixedArray fA3 = { { {1 COMMA 2} COMMA {3} } COMMA { { 4 } } } ); #endif HEADING("ASSIGNMENT OPERATOR"); EVAL2("Passing Vector to assignment operator", v, w = v, w); EVAL2("Passing const Vector to assignment operator", v_const, w = v_const, w); EVAL2("Passing Vector rvalue to assignment operator", v, w = v(stride(1,0,-1)), w); EVAL2("Passing const-Vector rvalue to assignment operator", v_const, w = v_const(stride(1,0,-1)), w); EVAL2("Passing Expression to assignment operator", v, w = v+v, w); HEADING("PASSING Vector TO FUNCTIONS"); EVAL2("Passing Vector as argument to function taking const Vector&", v, w = square(v), w); VERDICT98("too many copies"); VERDICT11("could replace last copy with a move"); EVAL("Passing Vector as argument to function taking Vector&", v, square_in_place(v)); VERDICT98("correct"); v = v_data; EVAL2("Passing Vector as argument to function taking Vector", v, w = square_copy(v), w); VERDICT98("too many copies, unexpected change of argument"); VERDICT11("should do deep copy on input, replace last copy with a move"); /* // Behaves same as passing non-const Vector, which is correct // Passing const Vector EVAL2("Passing const Vector as argument to function taking const Vector&", v_const, w = square(v_const), w); // The following should not compile: // EVAL("Passing const Vector as argument to function taking Vector&", // v_const, square_in_place(v_const)); EVAL2("Passing const Vector as argument to function taking Vector", v_const, w = square_copy(v_const), w); */ HEADING("LINKING"); w.clear(); EVAL2("Linking to Vector", v, w >>= v, w); /* w.clear(); // This should not compile EVAL2("Linking to const Vector", v_const, w >>= v_const, w); */ w.clear(); EVAL2("Linking to Vector rvalue", v, w >>= v(stride(1,0,-1)), w); /* // This should not compile w.clear(); EVAL2("Linking to const-Vector rvalue", v_const, w >>= v_const(stride(1,0,-1)), w); */ /* // This should not compile w.clear(); EVAL2("Linking to Expression", v, w >>= v+v, w); VERDICT98("this doesn't make much sense"); */ HEADING("PASSING Vector TO FUNCTIONS"); EVAL2("Passing Vector as argument to function taking const Vector&", v, w = square(v), w); VERDICT98("too many copies"); VERDICT11("could replace last copy with a move"); EVAL("Passing Vector as argument to function taking Vector&", v, square_in_place(v)); VERDICT98("correct"); v = v_data; EVAL2("Passing Vector as argument to function taking Vector", v, w = square_copy(v), w); VERDICT98("too many copies, unexpected change of argument"); VERDICT11("should do deep copy on input, replace last copy with a move"); HEADING("PASSING Vector RVALUE TO FUNCTIONS"); EVAL2("Passing Vector rvalue as argument to function taking const Vector&", v, w = square(v(stride(1,0,-1))), w); VERDICT98("correct"); EVAL_FAIL("Passing Vector rvalue as argument to function taking Vector&", v, square_in_place(v(stride(1,0,-1)))); VERDICT98("Vector subset functions could return references?"); v = v_data; EVAL2("Passing Vector rvalue as argument to function taking Vector", v, w = square_copy(v(stride(1,0,-1))), w); VERDICT98("Vector subset functions could return references?"); VERDICT11("Should use move function"); HEADING("PASSING const Vector RVALUES TO FUNCTIONS"); EVAL2("Passing const-Vector rvalue as argument to function taking const Vector&", v_const, w = square(v_const(stride(1,0,-1))), w); VERDICT98("correct"); // This should not compile // EVAL("Passing const-Vector rvalue as argument to function taking Vector&", // v_const, square_in_place(v_const(stride(1,0,-1)))); // VERDICT98("Vector subset functions could return references?"); EVAL2("Passing const-Vector rvalue as argument to function taking Vector", v_const, w = square_copy(v_const(stride(1,0,-1))), w); VERDICT98("correct"); // VERDICT11("Should use move function"); HEADING("PASSING Expression TO FUNCTIONS"); EVAL2("Passing Expression as argument to function taking const Vector&", v, w = square(v+v), w); VERDICT98("Unclear why copy-assignment + constructor needed"); // This should not compile: // EVAL("Passing Expression as argument to function taking Vector&", // v, square_in_place(v+v)); v = v_data; EVAL2("Passing Expression as argument to function taking Vector", v, w = square_copy(v+v), w); VERDICT98("Unclear why copy-assignment + constructor needed"); return 0; } ================================================ FILE: test/test_derivatives.cpp ================================================ /* test_derivatives.cpp - Test derivatives of mathematical functions Copyright (C) 2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #define TEST_UNARY_FUNC(FUNC) \ { \ std::cout << " Checking " << #FUNC << "... \t"; \ aVector x = x_save; \ stack.new_recording(); \ aVector y = FUNC(x); \ Vector dy_dx_num = (FUNC(x_save+dx)-FUNC(x_save)) / dx; \ Vector dy_dx_adept(N); \ for (int i = 0; i < N; ++i) { \ x[i].set_gradient(1.0); \ stack.forward(); \ y[i].get_gradient(dy_dx_adept[i]); \ } \ Real max_err \ = maxval(abs(dy_dx_adept-dy_dx_num)); \ Real max_frac_err \ = maxval(abs(dy_dx_adept-dy_dx_num)/dy_dx_adept); \ if (max_err == 0) { \ std::cout << "max error = 0: PASSED\n"; \ } \ if (max_frac_err <= MAX_FRAC_ERR) { \ std::cout << "max fractional error = " << max_frac_err \ << ": PASSED\n"; \ } \ else { \ std::cout << "max fractional error = " \ << max_frac_err << ": FAILED\n"; \ std::cout << " Adept dy/dx = " \ << dy_dx_adept << "\n"; \ std::cout << " Numerical dy/dx = " << dy_dx_num << "\n"; \ error_too_large = true; \ } \ } #define TEST_BINARY_FUNC(FUNC) \ { \ std::cout << " Checking " << #FUNC << "... \t"; \ aVector x = x_save; \ aVector y = y_save; \ stack.new_recording(); \ aVector z = FUNC(x,y); \ Vector dz_dx_num \ = (FUNC(x_save+dx,y_save)-FUNC(x_save,y_save)) / dx; \ Vector dz_dy_num \ = (FUNC(x_save,y_save+dy)-FUNC(x_save,y_save)) / dy; \ Vector dz_dx_adept(N); \ Vector dz_dy_adept(N); \ for (int i = 0; i < N; ++i) { \ z[i].set_gradient(1.0); \ stack.reverse(); \ x[i].get_gradient(dz_dx_adept[i]); \ y[i].get_gradient(dz_dy_adept[i]); \ } \ Real max_err \ = std::max(maxval(abs(dz_dx_adept-dz_dx_num)), \ maxval(abs(dz_dy_adept-dz_dy_num))); \ Real max_frac_err \ = std::max(maxval(abs(dz_dx_adept-dz_dx_num)/dz_dx_adept), \ maxval(abs(dz_dy_adept-dz_dy_num)/dz_dy_adept)); \ if (max_err == 0) { \ std::cout << "max error = 0: PASSED\n"; \ } \ if (max_frac_err <= MAX_FRAC_ERR) { \ std::cout << "max fractional error = " << max_frac_err \ << ": PASSED\n"; \ } \ else { \ std::cout << "max fractional error = " \ << max_frac_err << ": FAILED\n"; \ std::cout << " Adept dz/dx = " << dz_dx_adept << "\n"; \ std::cout << " Adept dz/dy = " << dz_dy_adept << "\n"; \ std::cout << " Numerical dz/dx = " << dz_dx_num << "\n"; \ std::cout << " Numerical dz/dy = " << dz_dy_num << "\n"; \ error_too_large = true; \ } \ } int main(int argc, const char** argv) { using namespace adept; Stack stack; static const int N = 12; static const Real MAX_FRAC_ERR = 1.0e-5; Vector x_save(N); x_save = 0.2; x_save << 0.01, 0.4, 0.99; Vector y_save(N); y_save = 0.7; y_save << 0.9, 0.6, 0.1, -0.1; Real dx = 1.0e-8; if (sizeof(Real) < 8) { // Single precision only works with larger perturbations dx = 1.0e-5; } Real dy = dx; bool error_too_large = false; std::cout << "EVALUATING UNARY FUNCTIONS\n"; std::cout << "For functions of the form y=FUNC(x), where x=" << x_save << ",\n"; std::cout << "checking that fractional difference between dy/dx computed using Adept\n"; std::cout << "and numerically by perturbing x by " << dx << " is less than " << MAX_FRAC_ERR << ".\n"; TEST_UNARY_FUNC(-); // Unary minus TEST_UNARY_FUNC(+); // Unary plus TEST_UNARY_FUNC(log); TEST_UNARY_FUNC(log10); TEST_UNARY_FUNC(sin); TEST_UNARY_FUNC(cos); TEST_UNARY_FUNC(tan); TEST_UNARY_FUNC(asin); TEST_UNARY_FUNC(acos); TEST_UNARY_FUNC(atan); TEST_UNARY_FUNC(sinh); TEST_UNARY_FUNC(cosh); TEST_UNARY_FUNC(tanh); TEST_UNARY_FUNC(abs); TEST_UNARY_FUNC(fabs); TEST_UNARY_FUNC(exp); TEST_UNARY_FUNC(sqrt); TEST_UNARY_FUNC(ceil); TEST_UNARY_FUNC(floor); TEST_UNARY_FUNC(log2); TEST_UNARY_FUNC(expm1); TEST_UNARY_FUNC(exp2); TEST_UNARY_FUNC(log1p); TEST_UNARY_FUNC(asinh); TEST_UNARY_FUNC(acosh); TEST_UNARY_FUNC(atanh); TEST_UNARY_FUNC(erf); TEST_UNARY_FUNC(erfc); TEST_UNARY_FUNC(cbrt); TEST_UNARY_FUNC(round); TEST_UNARY_FUNC(trunc); TEST_UNARY_FUNC(rint); TEST_UNARY_FUNC(nearbyint); std::cout << "EVALUATING BINARY FUNCTIONS\n"; std::cout << "For functions of the form z=FUNC(x,y), where x=" << x_save << ",\n"; std::cout << "and y=" << y_save << ", checking that fractional difference between\n"; std::cout << "dz/dx and dz/dy computed using Adept and numerically by perturbing\n"; std::cout << "x and y by " << dx << " is less than " << MAX_FRAC_ERR << ".\n"; TEST_BINARY_FUNC(pow); TEST_BINARY_FUNC(atan2); TEST_BINARY_FUNC(max); TEST_BINARY_FUNC(min); TEST_BINARY_FUNC(fmax); TEST_BINARY_FUNC(fmin); TEST_BINARY_FUNC(copysign); if (error_too_large) { std::cerr << "*** Error: fractional error in the derivatives of some functions too large\n"; if (sizeof(Real) < 8) { std::cerr << "*** (but you are using less than double precision so it is not surprising)\n"; } return 1; } else { return 0; } } ================================================ FILE: test/test_fastexp.cpp ================================================ /* test_fastexp.cpp - Test Adept's fast exponential for correctness Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. This file tests Adept's fast exponential function "fastexp", which is vectorizable. */ #include #include #include "adept_arrays.h" using namespace adept; int main(int argc, const char** argv) { { std::cout << "DOUBLE PRECISION\n"; std::cout << "Packet::size = " << internal::Packet::size << "\n"; Vector x = linspace(-750.0,750.0,128); x(end) = std::numeric_limits::quiet_NaN(); Vector exponential = exp(x); Vector fast_exponential = fastexp(x); Vector fractional_error = (fast_exponential - exponential) / exponential; // std::cout << fractional_error << "\n"; Matrix M(128,4); M(__,0) = x; M(__,1) = exponential; M(__,2) = fast_exponential; M(__,3) = fractional_error; std::cout << "x exp(x) fastexp(x) fractional-error"; std::cout << M << "\n"; } { std::cout << "\nSINGLE PRECISION\n"; std::cout << "Packet::size = " << internal::Packet::size << "\n"; floatVector x = linspace(-100.0,100.0,128); x(end) = std::numeric_limits::quiet_NaN(); floatVector exponential = exp(x); floatVector fast_exponential = fastexp(x); floatVector fractional_error = (fast_exponential - exponential) / exponential; floatMatrix M(128,4); M(__,0) = x; M(__,1) = exponential; M(__,2) = fast_exponential; M(__,3) = fractional_error; std::cout << "x exp(x) fastexp(x) fractional-error"; std::cout << M << "\n"; } return 0; } ================================================ FILE: test/test_fixed_arrays.cpp ================================================ /* test_arrays.cpp - Test Adept's array functionality Copyright (C) 2016-2017 European Centre for Medium-Range Weather Forecasts Author: Robin Hogan Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include #define ADEPT_BOUNDS_CHECKING 1 #include #include // The following controls whether to use active variables or not //#define ALL_ACTIVE 1 //#define MARVEL_STYLE 1 using namespace adept; int main(int argc, const char** argv) { using namespace adept; Stack stack; #define HEADING(MESSAGE) \ std::cout << "====================================================================\n" \ << " TESTING " << MESSAGE << "\n" #define EVAL(MESSAGE, TYPE, X, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPE X; \ X = test. X; \ std::cout << #TYPE << " " << #X << " = " << X << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #define EVAL2(MESSAGE, TYPEX, X, TYPEY, Y, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPEX X; \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ TYPEY Y; Y = test. Y; \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #define EVAL3(MESSAGE, TYPEX, X, TYPEY, Y, TYPEZ, Z, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ try { \ TYPEX X; \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ TYPEY Y; Y = test. Y; \ TYPEZ Z; Z = test. Z; \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << #TYPEZ << " " << #Z << " = " << Z << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } catch (const adept::exception& e) { \ std::cout << "*** Failed with: " << e.what() << "\n"; \ if (!should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ else { \ std::cout << "*** Correct behaviour\n"; \ } \ } #define EVAL_NO_TRAP(MESSAGE, TYPE, X, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ { \ TYPE X; \ X = test. X; \ std::cout << #TYPE << " " << #X << " = " << X << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } #define EVAL2_NO_TRAP(MESSAGE, TYPEX, X, TYPEY, Y, EXPR) \ std::cout << "--------------------------------------------------------------------\n" \ << "### " << MESSAGE << "\n### " << #EXPR << "\n"; \ { \ TYPEX X; \ X = test. X; \ std::cout << #TYPEX << " " << #X << " = " << X << "\n"; \ TYPEY Y; Y = test. Y; \ std::cout << #TYPEY << " " << #Y << " = " << Y << "\n"; \ std::cout << "Evaluating " << #EXPR << "\n"; \ std::cout.flush(); \ EXPR; \ std::cout << "Result: " << #X << " = " << X << "\n"; \ if (should_fail) { std::cout << "*** INCORRECT OUTCOME\n"; \ anomalous_results++; \ } \ } #ifdef ALL_ACTIVE #define IS_ACTIVE true #else #define IS_ACTIVE false #endif typedef FixedArray myVector2; typedef FixedArray myVector3; typedef FixedArray myMatrix12; typedef FixedArray myMatrix33; typedef FixedArray myMatrix23; typedef FixedArray myMatrix32; typedef FixedArray myMatrix22; #ifndef ALL_ACTIVE typedef Real myReal; typedef SymmMatrix mySymmMatrix; typedef DiagMatrix myDiagMatrix; typedef TridiagMatrix myTridiagMatrix; typedef LowerMatrix myLowerMatrix; typedef UpperMatrix myUpperMatrix; #else typedef aReal myReal; typedef aSymmMatrix mySymmMatrix; typedef aDiagMatrix myDiagMatrix; typedef aTridiagMatrix myTridiagMatrix; typedef aLowerMatrix myLowerMatrix; typedef aUpperMatrix myUpperMatrix; #endif // typedef SpecialMatrix,false> mySymmMatrix; // typedef SpecialMatrix,false> myDiagMatrix; // typedef SpecialMatrix,false> myTridiagMatrix; struct Test { myReal x; myVector2 z; myVector3 v, w; myMatrix12 K; myMatrix23 M, N; myMatrix33 S, C; myMatrix32 A; myMatrix22 B; mySymmMatrix O, P; myDiagMatrix D, E; myTridiagMatrix T, TT; myLowerMatrix L, LL; myUpperMatrix U, UU; intVector index; Test() { x = -2; O.resize(3); // Q.resize(5); index.resize(2); v(0) = 2; v(1) = 3; v(2) = 5; w(0) = 7; w(1) = 11; w(2) = 13; M(0,0) = 2; M(0,1) = 3; M(0,2) = 5; M(1,0) = 7; M(1,1) = 11; M(1,2) = 13; N(0,0) = 17; N(0,1) = 19; N(0,2) = 23; N(1,0) = 29; N(1,1) = 31; N(1,2) = 37; S(0,0) = 2; S(0,1) = 3; S(0,2) = 5; S(1,0) = 7; S(1,1) = 11; S(1,2) = 13; S(2,0) = 17; S(2,1) = 19; S(2,2) = 23; K << 57, 59; z << 37, 47; A << 21,22,23,24,25,26; B << 31,32,33,34; // O = -M.T(); O(0,0) = 7; O(1,0) = 2; O(1,1) = 11; O(2,0) = 3; O(2,1) = 5; O(2,2) = 13; /* P = 14-O; Q.diag_vector(-2) = 1; Q.diag_vector(-1) = 2; Q.diag_vector(0) = 3; Q.diag_vector(1) = 4; */ C = 0; D = S; T = S; L = S; U = S; index << 1, 0; } }; stack.new_recording(); Test test; bool should_fail=false; int anomalous_results=0; #ifdef ALL_ACTIVE std::cout << "Testing ACTIVE arrays\n"; #else std::cout << "Testing INACTIVE arrays\n"; #endif HEADING("BASIC EXPRESSIONS"); EVAL2("Vector assignment to vector", myVector3, v, myVector3, w, v = w); EVAL2("Vector assignment to expression", myVector3, v, myVector3, w, v = log(w) + 1.0); EVAL("Matrix *= operator", myMatrix23, M, M *= 0.5); EVAL2("Matrix = scalar", myMatrix23, M, myReal, x, M = x); EVAL2("Matrix = scalar expression", myMatrix23, M, myReal, x, M = (10.0*x)); HEADING("BASIC FUNCTIONS"); EVAL2("max", myVector3, v, myVector3, w, v = max(v,w/3.0)); EVAL2("min", myVector3, v, myVector3, w, v = min(v,w/3.0)); HEADING("ARRAY SLICING"); EVAL2("Array indexing rvalue", myReal, x, myMatrix23, M, x = M(1,end-1)); should_fail=true; EVAL2("Array indexing rvalue out of range (SHOULD FAIL)", myReal, x, myMatrix23, M, x = M(1,3)); should_fail=false; EVAL("Array indexing lvalue", myMatrix23, M, M(1,end-1) *= -1.0); EVAL2("contiguous subarray rvalue", myVector3, v, myMatrix23, M, v = M(end,__)); EVAL("contiguous subarray lvalue", myMatrix23, M, M(end-1,__) /= 2.0); EVAL2("contiguous subarray rvalue using range", myVector2, z, myMatrix23, M, z = 2.0 * M(1,range(1,2))); EVAL2("contiguous subarray lvalue using range", myMatrix23, M, myVector3, v, M(end-1,range(0,1)) = log(v(range(1,2)))); EVAL2("contiguous subarray rvalue using subset", myMatrix12, K, myMatrix23, N, K = 2.0 * N.subset(1,1,1,2)); EVAL("contiguous subarray lvalue using subset", myVector3, v, v.subset(end-1,end) *= 10.0); EVAL2("regular subarray rvalue", myVector3, v, myVector3, w, v = w(stride(end,0,-1))); EVAL2("regular subarray lvalue", myMatrix23, M, myVector3, w, M(0,stride(0,end,2)) *= w(stride(end,0,-2))); EVAL("irregular subarray rvalue", myMatrix23, M, M(stride(1,0,-1),find(M(0,__)>4)) = 0); EVAL("slice leading dimension", myMatrix23, M, M[end] = 0); EVAL("slice two dimensions", myMatrix23, M, M[end][0] = 0); EVAL2("diag_vector member function as rvalue", myVector2, z, myMatrix33, S, z = diag_vector(S,1)); EVAL2("diag_vector member function as lvalue", myMatrix33, S, myVector3, v, S.diag_vector() += v); EVAL2("diag_matrix member function", myMatrix33, S, myVector3, v, S = v.diag_matrix()); EVAL2("diag_matrix external function", myMatrix33, S, myVector3, v, S = diag_matrix(v)); EVAL2("transpose as rvalue via T member function", myMatrix32, A, myMatrix23, M, A = 2 * M.T()); EVAL2("transpose as rvalue via permute member function", myMatrix32, A, myMatrix23, M, A = 2 * M.permute(1,0)); // EVAL3("2D arbitrary index as rvalue", myMatrix22, B, myMatrix23, N, intVector, index, B = const_cast(N)(index,index)); EVAL3("2D arbitrary index as rvalue", myMatrix22, B, myMatrix23, N, intVector, index, B = N(index,index)); EVAL3("2D arbitrary index as lvalue", myMatrix23, M, myMatrix23, N, intVector, index, M(index,index) = N(__,range(1,2))); EVAL2("2D arbitrary index as lvalue with assign-multiply operator", myMatrix23, M, intVector, index, M(index,index) *= 10.0); EVAL2("2D arbitrary index as lvalue with aliased right-hand-side", myMatrix23, M, intVector, index, M(index,index) += M(__,range(1,2))); HEADING("REDUCTION OPERATIONS"); EVAL2("full reduction", myReal, x, myMatrix23, M, x = sum(M)); EVAL2("1-dimension reduction", myVector3, v, myMatrix23, M, v = 0.5 * mean(M,0)); EVAL2("1-dimension reduction", myVector2, z, myMatrix23, M, z = norm2(M,1)); EVAL2("maxval", myVector2, z, myMatrix23, M, z = maxval(M,1)); EVAL2("minval", myVector2, z, myMatrix23, M, z = minval(M,1)); EVAL2("dot product", myReal, x, myVector3, w, x = dot_product(w,w(stride(end,0,-1)))); // EVAL2("1D interpolation", myVector3, v, myVector3, w, (v = interp(value(v), w, value(w)/3.0) )); EVAL2("1D interpolation", myVector3, v, myVector3, w, v = interp(value(v), w, value(w)/2.0)); EVAL2("1D clamped interpolation", myVector3, v, myVector3, w, v = interp(value(v), w, value(w)/2.0, ADEPT_EXTRAPOLATE_CLAMP)); #ifndef ALL_ACTIVE EVAL2("1D interpolation of matrix", myMatrix23, M, myVector3, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0)); EVAL2("1D clamped interpolation of matrix", myMatrix23, M, myVector3, v, M = interp(v(range(0,1)), M, v(range(1,2))/2.0, ADEPT_EXTRAPOLATE_CLAMP)); #endif HEADING("CONDITIONAL OPERATIONS"); EVAL2("where construct, scalar right-hand-side", myMatrix23, M, myMatrix23, N, M.where(N > 20) = 0); EVAL2("where construct, expression right-hand-side", myMatrix23, M, myMatrix23, N, M.where(N > 20) = -N); EVAL2("where construct, scalar either-or right-hand-side", myMatrix23, M, myMatrix23, N, M.where(N > 20) = either_or(0,1)); EVAL2("where construct, expression either-or right-hand-side", myMatrix23, M, myMatrix23, N, M.where(N > 20) = either_or(-N,N)); EVAL("find construct, scalar right-hand-side", myVector3, v, v(find(v > 3.5)) = 0); EVAL("find construct, expression right-hand-side", myVector3, v, v(find(v > 3.5)) = -v(range(end,end))); EVAL("find construct, multiply-assign right-hand-side", myVector3, v, v(find(v != 5.0)) *= 10.0); HEADING("SPECIAL SQUARE MATRICES"); EVAL2("SymmMatrix assign from fixed matrix", mySymmMatrix, O, myMatrix33, S, O = S); EVAL2("DiagMatrix assign from dense matrix", myDiagMatrix, D, myMatrix33, S, D = S); EVAL2("TridiagMatrix assign from dense matrix", myTridiagMatrix, T, myMatrix33, S, T = S); EVAL2("LowerMatrix assign from dense matrix", myLowerMatrix, L, myMatrix33, S, L = S); EVAL2("UpperMatrix assign from dense matrix", myUpperMatrix, U, myMatrix33, S, U = S); EVAL2("SymmMatrix as rvalue", myMatrix33, S, mySymmMatrix, O, S = O); EVAL2("DiagMatrix as rvalue", myMatrix33, S, myDiagMatrix, D, S = D); EVAL2("TridiagMatrix as rvalue", myMatrix33, S, myTridiagMatrix, T, S = T); EVAL2("LowerMatrix as rvalue", myMatrix33, S, myLowerMatrix, L, S = L); EVAL2("UpperMatrix as rvalue", myMatrix33, S, myUpperMatrix, U, S = U); EVAL2("Array submatrix_on_diagonal member function", myMatrix22, B, myMatrix33, S, B = S.submatrix_on_diagonal(1,2)); EVAL("Array submatrix_on_diagonal member function as lvalue", myMatrix33, S, S.submatrix_on_diagonal(0,1) = 0); should_fail = true; EVAL2("Array submatrix_on_diagonal member function to non-square matrix", myMatrix22, B, myMatrix33, N, B = N.submatrix_on_diagonal(1,2)); should_fail = false; #ifndef MARVEL_STYLE if (adept::have_matrix_multiplication()) { HEADING("MATRIX MULTIPLICATION"); EVAL2("Matrix-Matrix multiplication", myMatrix33, S, myMatrix23, M, S = M.T() ** M); EVAL2("Matrix-Matrix multiplication with matmul", myMatrix33, S, myMatrix23, M, S = matmul(M.T(), M)); should_fail = true; EVAL2("Matrix-Matrix multiplication with inner dimension mismatch", myMatrix33, S, myMatrix23, M, S = M ** M); should_fail = false; // TESTING! EVAL2("Matrix-Matrix-Vector multiplication", myVector3, v, myMatrix33, S, v = S ** S ** v); EVAL2("Matrix-Matrix-Vector multiplication", myVector3, v, myMatrix33, S, v = S ** log(S) ** S(0,__)); EVAL2("Vector-Matrix multiplication", myVector3, v, myMatrix33, S, v = v ** S); EVAL2("Vector-Matrix multiplication with matmul", myVector3, v, myMatrix33, S, v = matmul(v, S)); EVAL2("SymmMatrix-Vector multiplication", myVector3, v, mySymmMatrix, O, v = O ** v); EVAL2("SymmMatrix-Matrix multiplication", myMatrix33, S, mySymmMatrix, O, S = O ** S); EVAL2("Vector-SymmMatrix multiplication", myVector3, v, mySymmMatrix, O, v = v ** O); EVAL2("Matrix-SymmMatrix multiplication", myMatrix23, M, mySymmMatrix, O, M = M ** O); EVAL2("DiagMatrix-Vector multiplication", myVector3, v, myDiagMatrix, D, v = D ** v); EVAL2("TridiagMatrix-Vector multiplication", myVector3, v, myTridiagMatrix, T, v = T ** v); EVAL2("TridiagMatrix-Matrix multiplication", myMatrix33, S, myTridiagMatrix, T, S = T ** S); EVAL2("Vector-TridiagMatrix multiplication", myVector3, v, myTridiagMatrix, T, v = v ** T); EVAL2("Matrix-TridiagMatrix multiplication", myMatrix23, M, myTridiagMatrix, T, M = M ** T); } else { std::cout << "NO MATRIX MULTIPLICATION TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\n"; } #ifndef ALL_ACTIVE if (adept::have_linear_algebra()) { HEADING("LINEAR ALGEBRA"); EVAL2("Solving general linear equations Ax=b", myVector3, v, myMatrix33, S, v = solve(S,v)); EVAL2("Solving general linear equations AX=B", myMatrix23, M, myMatrix33, S, M.T() = solve(S,M.T())); EVAL2("Solving linear equations Ax=b with symmetric A", myVector3, v, mySymmMatrix, O, v = solve(O,v)); EVAL2("Solving linear equations AX=B with symmetric A", myMatrix23, M, mySymmMatrix, O, M.T() = solve(O,M.T())); EVAL2("Invert general matrix", myMatrix33, C, myMatrix33, S, C = inv(S)); } else { std::cout << "NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ADEPT COMPILED WITHOUT LAPACK\n"; } #else std::cout << "NO LINEAR ALGEBRA TESTS PERFORMED BECAUSE ACTIVE ARRAYS NOT YET SUPPORTED\n"; #endif #else std::cout << "NO MATRIX TESTS PERFORMED BECAUSE USING MARVEL-STYLE ACTIVE ARRAYS\n"; #endif HEADING("FILLING ARRAYS"); EVAL("Fill vector with \"<<\"", myVector3, v, (v << 0.1, 0.2)); should_fail = true; EVAL("Overfill vector with \"<<\"", myVector3, v, (v << 0.1, 0.2, 0.3, 0.4)); should_fail = false; EVAL("Underfill matrix with \"<<\"", myMatrix23, M, (M << 0.1, 0.2, 0.3, 0.4, 0.5)); EVAL("Fill matrix with \"<<\"", myMatrix23, M, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6)); should_fail = true; EVAL("Overfill matrix with \"<<\"", myMatrix23, M, (M << 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)); should_fail = false; EVAL("Fill vector with vectors using \"<<\"", myVector3, v, v << v(range(1,2)) << 0.1); EVAL2("Fill matrix with vector using \"<<\"", myMatrix23, M, myVector3, v, M << 0.1 << 0.2 << 0.3 << v); EVAL2("Fill matrix with vector using \"<<\"", myMatrix33, S, myVector3, v, S << v << v << v); EVAL("Assign array using range", myVector3, v, v = range(3,5)); #ifdef ADEPT_BOUNDS_CHECKING HEADING("BOUNDS CHECKING"); should_fail = true; EVAL("Access vector out of bounds", myVector3, v, v(0) = v(4)); EVAL("Access vector out of bounds", myVector3, v, v(0) = v(end-4)); EVAL("Access matrix out of bounds", myMatrix23, M, M(0,0) = M(0,-1)); EVAL("Access matrix out of bounds", myMatrix23, M, M(0,0) = M(end+1,1)); should_fail = false; #endif std::cout << "====================================================================\n"; if (anomalous_results > 0) { std::cout << "*** In terms of run-time errors, there were " << anomalous_results << " incorrect results\n"; return 1; } else { std::cout << "In terms of run-time errors, all tests were passed\n"; return 0; } } ================================================ FILE: test/test_gsl_interface.cpp ================================================ /* test_gsl_interface.cpp - "main" function for Test 4 Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // This program minimizes the N-dimensional Rosenbrock banana // function, with the number of dimensions optionally provided on the // command line #include #include #include #include "state.h" int main(int argc, char** argv) { std::cout << "Testing Adept-GSL interface using N-dimensional Rosenbrock function\n"; std::cout << "Usage: " << argv[0] << " [number_of_dimensions]\n"; // Read number of dimensions from the command line (default 2) int nx = 2; if (argc > 1) { nx = std::atoi(argv[1]); } if (nx < 2) { std::cout << "Error: must have 2 or more dimensions, but " << nx << " requested\n"; return 1; } // Create minimization environment (see state.h) and then minimize // the function; note that initial values are set on construction. State state(nx); state.minimize(); // Print out the result std::vector x; state.x(x); std::cout << "Final state: x = ["; for (int i = 0; i < nx; i++) { std::cout << " " << x[i]; } std::cout << "]\n"; return 0; } ================================================ FILE: test/test_interp.cpp ================================================ /* test_interp.cpp Copyright (C) 2024- European Centre for Medium-Range Weather Forecasts Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. This file tests interpolation operations */ #include #include "adept_arrays.h" using namespace adept; #define TEST_MULTI(FUNC) \ { \ std::cout << #FUNC << " ="; \ std::cout << FUNC << "\n"; \ } #define TEST(FUNC) \ { \ std::cout << FUNC << " \t = " << #FUNC << "\n"; \ } int main(int argc, const char** argv) { set_array_print_style(PRINT_STYLE_MATLAB); { std::cout << "*** 1D interpolation ***\n\n"; Vector x = {1.0, 4.0, 9.0}; Vector m = {2.0, 3.0, 5.0}; Vector xi = {4.0, 4.8, 3.0, 0.5, 10.0}; std::cout << "Coordinate vector and interpolation vector:\n"; std::cout << "x = " << x << "\n"; std::cout << "m = " << m << "\n"; std::cout << "xi = " << xi << "\n"; std::cout << "...which are:\n" << " (1) at a point in the interpolation vector,\n" << " (2) between points in the interpolation vector (closer to left),\n" << " (3) between points in the interpolation vector (closer to right),\n" << " (4) off the left of the interpolation vector, and\n" << " (5) off the right of the interpolation vector.\n\n"; TEST(interp(x,m,xi)); TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_CLAMP)); TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_CONSTANT)); TEST(interp(x,m,xi,ADEPT_EXTRAPOLATE_CONSTANT,-10.0)); TEST(interp(x(stride(end,0,-1)),m(stride(end,0,-1)),xi,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp(x+0.0,m+0.0,xi+0.0,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST)); TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP)); TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT)); TEST(interp(x,m,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT,-10.0)); TEST(interp(x(stride(end,0,-1)),m(stride(end,0,-1)),xi,ADEPT_INTERPOLATE_NEAREST)); Matrix M = spread<1>(m,2); std::cout << "\n*** Multiple 1D linear interpolation ***\n"; std::cout << "M = " << M << "\n"; TEST_MULTI(interp(x,M,xi)); TEST_MULTI(interp(x,M,xi,ADEPT_INTERPOLATE_NEAREST)); } { std::cout << "\n*** 2D linear interpolation ***\n\n"; int nx = 4; int ny = 3; Vector y = pow(linspace(1.0,ny,ny),2.0); Vector x = linspace(1.0,nx,nx); Matrix M = {{2.0,3.0,5.0,7.0}, {11.0,13.0,17.0,19.0}, {23.0,29.0,31.0,37.0}};//outer_product(y,x); Vector yi = {4.0, 2.0, 6.5, 0.5}; Vector xi = {2.0, 3.8, 0.5, 5.0}; std::cout << "Coordinate vectors and interpolation matrix:\n"; std::cout << "y = " << y << "\n"; std::cout << "x = " << x << "\n"; std::cout << "M = " << M << "\n"; std::cout << "\nTo be interpolated to the following points:\n"; std::cout << "yi = " << yi << "\n"; std::cout << "xi = " << xi << "\n"; std::cout << "...which are:\n" << " (1) at a point in the interpolation matrix,\n" << " (2) between points in the interpolation matrix,\n" << " (3) off the left of the matrix, and\n" << " (4) off the top-right of the matrix.\n\n"; TEST(interp2d(y,x,M,yi,xi)); TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_CLAMP)); TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT)); TEST(interp2d(y,x,M,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT,-10.0)); TEST(interp2d(y(stride(end,0,-1)),x,M(stride(end,0,-1),__),yi,xi)); TEST(interp2d(y+0.0,x+0.0,M+0.0,yi+0.0,xi+0.0)); TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST)); TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP)); TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT)); TEST(interp2d(y,x,M,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT,-10.0)); TEST(interp2d(y(stride(end,0,-1)),x,M(stride(end,0,-1),__),yi,xi,ADEPT_INTERPOLATE_NEAREST)); Array3D A = spread<2>(M,2); std::cout << "\n*** Multiple 2D linear interpolation ***\n"; std::cout << "A = " << A << "\n"; TEST_MULTI(interp2d(y,x,A,yi,xi)); } { std::cout << "\n*** 3D interpolation ***\n\n"; int nx = 4; int ny = 3; int nz = 2; Vector z = linspace(1.0,nz,nz); Vector y = linspace(1.0,ny,ny); Vector x = pow(linspace(1.0,nx,nx),2.0); Array3D A(nz,ny,nx); A(0,__,__) = outer_product(y,x); A(1,__,__) = outer_product(y,x)+1.0; Vector zi = {2.0, 1.2, 1.5, 5.0}; Vector yi = {2.0, 2.6, 0.5, 5.0}; Vector xi = {4.0, 10.0,20.0, 0.5}; std::cout << "Coordinate vectors and interpolation array:\n"; std::cout << "z = " << z << "\n"; std::cout << "y = " << y << "\n"; std::cout << "x = " << x << "\n"; std::cout << "A = " << A << "\n"; std::cout << "\nTo be interpolated to the following points:\n"; std::cout << "zi = " << zi << "\n"; std::cout << "yi = " << yi << "\n"; std::cout << "xi = " << xi << "\n"; std::cout << "...which are:\n" << " (1) at a point in the interpolation array,\n" << " (2) between points in the interpolation array,\n" << " (3) off the array in two dimension but not the third, and\n" << " (4) off all dimensions of the array.\n\n"; TEST(interp3d(z,y,x,A,zi,yi,xi)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_CLAMP)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_EXTRAPOLATE_CONSTANT,-10.0)); TEST(interp3d(z,y(stride(end,0,-1)),x,A(__,stride(end,0,-1),__),zi,yi,xi,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp3d(z+0.0,y+0.0,x+0.0,A+0.0,zi+0.0,yi+0.0,xi+0.0,ADEPT_EXTRAPOLATE_LINEAR)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CLAMP)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT)); TEST(interp3d(z,y,x,A,zi,yi,xi,ADEPT_INTERPOLATE_NEAREST|ADEPT_EXTRAPOLATE_CONSTANT,-10.0)); TEST(interp3d(z,y(stride(end,0,-1)),x,A(__,stride(end,0,-1),__),zi,yi,xi,ADEPT_INTERPOLATE_NEAREST)); } return 0; } ================================================ FILE: test/test_minimizer.cpp ================================================ /* test_minimizer.cpp - Test Adept minimizer with N-dimensional Rosenbrock function Copyright (C) 2020-2022 ECMWF Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ //#include // for std::stoi in C++11 #include // for std::sscanf in C++98 #include #include // Set this to a large or small number to test if the minimization // algorithms are immune to the absolute scaling of the cost function #define COST_SCALING 1.0 using namespace adept; class RosenbrockN : public Optimizable { public: RosenbrockN() : ls_iteration_(0), exact_hessian_(false) {} int ls_iteration_; // Line search iteration // Do we use the exact Hessian derivate analytically, or the // approximate one from the Jacobian matrix and the Gauss-Newton // formula? bool exact_hessian_; // N-dimensional Rosenbrock function can be expressed as the sum of // the squared elements of vector y(x) defined as follows. This // form facilitates the calculation of the approximate Hessian from // the Jacobian dy/dx. It is templated so that can be called either // with a passive "Vector" or active "aVector" argument. template Array<1,Real,IsActive> calc_y(const Array<1,Real,IsActive>& x) { int nx = x.size(); Array<1,Real,IsActive> y((nx-1)*2); for (int ix = 0; ix < nx-1; ++ix) { y(ix*2) = 10.0 * (x(ix+1)-x(ix)*x(ix)); y(ix*2+1) = 1.0 - x(ix); } y *= sqrt(2.0 * COST_SCALING); return y; } void calc_exact_hessian(const adept::Vector& x, SymmMatrix& hessian) { hessian = 0.0; int nx = hessian.dimension(); for (int ix = 0; ix < nx-1; ++ix) { hessian(ix,ix) = 1200.0*x(ix)*x(ix) - 400.0*x(ix+1) + 2.0; hessian(ix,ix+1) = -400.0*x(ix); } for (int ix = 1; ix < nx; ++ix) { hessian(ix,ix) = hessian(ix,ix) + 200.0; } } virtual void report_progress(int niter, const adept::Vector& x, Real cost, Real gnorm) { ls_iteration_ = 0; std::cout << "Iteration " << niter << ": cost=" << cost << ", gnorm=" << gnorm << "\n"; } void state_to_stderr(const adept::Vector& x, Real cost) { // For plotting progress, direct standard error to a text file std::cerr << ls_iteration_ << " "; for (int ix = 0; ix < x.size(); ++ix) { std::cerr << x(ix) << " "; } std::cerr << cost << "\n"; ++ls_iteration_; } void final_state_to_stderr(const adept::Vector& x, Real cost) { ls_iteration_ = -1; state_to_stderr(x, cost); } virtual bool provides_derivative(int order) { if (order >= 0 && order <= 2) { return true; } else { return false; } } virtual Real calc_cost_function(const Vector& x) { //std::cout << " test x: " << x << "\n"; Vector y = calc_y(x); Real cost = 0.5*sum(y*y); state_to_stderr(x,cost); return cost; } virtual Real calc_cost_function_gradient(const Vector& x, Vector gradient) { Stack stack; aVector xactive = x; stack.new_recording(); aVector y = calc_y(xactive); aReal cost = 0.5*sum(y*y); cost.set_gradient(1.0); stack.reverse(); gradient = xactive.get_gradient(); state_to_stderr(x,value(cost)); return value(cost); } virtual Real calc_cost_function_gradient_hessian(const Vector& x, Vector gradient, SymmMatrix& hessian) { Stack stack; aVector xactive = x; stack.new_recording(); aVector y = calc_y(xactive); aReal cost = 0.5*sum(y*y); stack.independent(xactive); stack.dependent(y); Matrix jac = stack.jacobian(); if (exact_hessian_) { calc_exact_hessian(x, hessian); } else { hessian = jac.T() ** jac; } gradient = jac.T() ** value(y); state_to_stderr(x,value(cost)); return value(cost); } }; int main(int argc, const char* argv[]) { if (!adept::have_linear_algebra()) { std::cout << "Adept compiled without linear-algebra support: minimizer not available\n"; return 0; } RosenbrockN rosenbrock; Minimizer minimizer(MINIMIZER_ALGORITHM_LEVENBERG_MARQUARDT); // The convergence criterion should be changed in accordance with // the cost function scaling minimizer.set_converged_gradient_norm(0.1*COST_SCALING); int nx = 2; if (argc > 1) { // nx = std::stoi(argv[1]); std::sscanf(argv[1], "%d", &nx); if (argc > 2) { const char* algo_ptr = argv[2]; std::string algo(argv[2]); // If algorithm name is prefixed by "Newton-" then use the exact // Hessian matrix (analytically derived for this specific // function) rather than the Gauss-Newton approximation from the // Jacobian matrix if (algo.find("Newton-") == 0) { algo_ptr += 7; rosenbrock.exact_hessian_ = true; } minimizer.set_algorithm(algo_ptr); if (argc > 3) { int max_it; // max_it = std::stof(argv[3]); std::sscanf(argv[3], "%d", &max_it); minimizer.set_max_iterations(max_it); if (argc > 4) { double converged_grad_norm; //converged_grad_norm = std::stof(argv[4]); std::sscanf(argv[4], "%lf", &converged_grad_norm); minimizer.set_converged_gradient_norm(converged_grad_norm); } } } } else { std::cout << "Usage: " << argv[0] << " [nx] [Levenberg|Levenberg-Marquardt|Newton-Levenberg|Newton-Levenberg-Marquardt|L-BFGS|Conjugate-Gradient] [max_iterations] [converged_gradient_norm]\n"; } minimizer.set_levenberg_damping_start(0.25); //minimizer.set_max_step_size(1.0); // minimizer.set_levenberg_damping_multiplier(3.0, 5.0); minimizer.ensure_updated_state(2); std::cout << "Minimizing " << nx << "-dimensional Rosenbrock function\n"; std::cout << "Algorithm: " << minimizer.algorithm_name() << "\n"; std::cout << "Use exact Hessian: " << rosenbrock.exact_hessian_ << "\n"; std::cout << "Maximum iterations: " << minimizer.max_iterations() << "\n"; std::cout << "Converged gradient norm: " << minimizer.converged_gradient_norm() << "\n"; // Initial state vector Vector x(nx); // Standard start x = -3.0; // Trickier start (other end of the banana) //x = -3.0; x(1) = 3.0; // Near other minima in higher dimensions //x = 1.0; x(0) = -1.0; bool is_bounded = false; MinimizerStatus status; if (is_bounded) { // x = -3.0; x(1) = 3.0; x = -0.75; x(1) = 3.0; Vector x_lower, x_upper; adept::minimizer_initialize_bounds(nx, x_lower, x_upper); // x_upper(1) = 2.0; x_lower(1) = 0.2; x_lower(0) = -1; status = minimizer.minimize(rosenbrock, x, x_lower, x_upper); } else { status = minimizer.minimize(rosenbrock, x); } //rosenbrock.final_state_to_stderr(x, minimizer.cost_function()); std::cout << "Status: " << minimizer_status_string(status) << "\n"; std::cout << "Solution: x=" << x << "\n"; std::cout << "Number of samples: " << minimizer.n_samples() << "\n"; return static_cast(status); } ================================================ FILE: test/test_misc.cpp ================================================ /* test_misc.cpp Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include "adept.h" #include "algorithm.h" // A straight implementation of the trivial example in Hogan (2014) double algorithm_ad(const double x_val[2], // Input values double* Y_ad, // Input-output adjoint double x_ad[2]) { // Output adjoint using namespace adept; // Import Stack and adouble from adept Stack stack; // Where differential information is stored adouble x[2] = {x_val[0], x_val[1]}; // Initialize adouble inputs stack.new_recording(); // Start recording derivatives adouble Y = algorithm(x); // Version overloaded for adouble args Y.set_gradient(*Y_ad); // Load the input-output adjoint stack.reverse(); // Run the adjoint algorithm x_ad[0] = x[0].get_gradient(); // Extract the output adjoint for x[0] x_ad[1] = x[1].get_gradient(); // ...and x[1] *Y_ad = Y.get_gradient(); // Input-output adjoint has changed too return Y.value(); // Return result of simple computation } int main() { double x[2] = {2.0, 3.0}; double y_ad = 1.0; double x_ad[2]; double y = algorithm_ad(x, &y_ad, x_ad); std::cout << "x[0] = " << x[0] << "\n" << "x[1] = " << x[1] << "\n" << "y = " << y << "\n" << "y_ad = " << y_ad << "\n" << "x_ad[0]=" << x_ad[0] << "\n" << "x_ad[1]=" << x_ad[1] << "\n"; return 0; } ================================================ FILE: test/test_no_lib.cpp ================================================ /* test_no_lib.cpp Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ // This source file contains everything that would normally be // compiled into a static or dynamic library; this means that the // executable can be built without needing to link to the // library. This is useful for non-Unix platforms where the configure // script cannot be run. Note that only one source file should // #include "adept_source.h"; all the others should #include "adept.h" // as normal. #include "adept_source.h" #include "algorithm.h" // A straight implementation of the trivial example in Hogan (2014) double algorithm_ad(const double x_val[2], // Input values double* Y_ad, // Input-output adjoint double x_ad[2]) { // Output adjoint using namespace adept; // Import Stack and adouble from adept Stack stack; // Where differential information is stored adouble x[2] = {x_val[0], x_val[1]}; // Initialize adouble inputs stack.new_recording(); // Start recording derivatives adouble Y = algorithm(x); // Version overloaded for adouble args Y.set_gradient(*Y_ad); // Load the input-output adjoint stack.reverse(); // Run the adjoint algorithm x_ad[0] = x[0].get_gradient(); // Extract the output adjoint for x[0] x_ad[1] = x[1].get_gradient(); // ...and x[1] *Y_ad = Y.get_gradient(); // Input-output adjoint has changed too return Y.value(); // Return result of simple computation } int main() { double x[2] = {2.0, 3.0}; double y_ad = 1.0; double x_ad[2]; double y = algorithm_ad(x, &y_ad, x_ad); std::cout << "x[0] = " << x[0] << "\n" << "x[1] = " << x[1] << "\n" << "y = " << y << "\n" << "y_ad = " << y_ad << "\n" << "x_ad[0]=" << x_ad[0] << "\n" << "x_ad[1]=" << x_ad[1] << "\n"; return 0; } ================================================ FILE: test/test_packet_operations.cpp ================================================ /* test_packet_operations.cpp Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. This file tests Adept's vectorization capabilities Adept vectors of types float and double, and also Packet and Packet that encapsulate the underlying intrinsic SIMD vector types. */ #include #include "adept_arrays.h" using namespace adept; template Array<1,Type> p2v(internal::Packet p) { Array<1,Type> v(internal::Packet::size); p.put(v.data()); return v; } template void test_packet_operations() { static const int N = internal::Packet::size; std::cout << "\nADEPT PACKET\n"; std::cout << "Type: " << sizeof(Type) << "-byte floating point numbers\n"; std::cout << "Packet size: " << N << "\n"; Array<1,Type> v(N), w(N); v = range(1,N); w = 2.0; internal::Packet p(v.data()); internal::Packet q(w.data()); std::cout << "p = " << p2v(p) << "\n"; std::cout << "q = " << p2v(q) << "\n"; std::cout << "p+q = " << p2v(p+q) << "\n"; std::cout << "p-q = " << p2v(p-q) << "\n"; std::cout << "p*q = " << p2v(p*q) << "\n"; std::cout << "p/q = " << p2v(p/q) << "\n"; std::cout << "sqrt(p) = " << p2v(sqrt(p)) << "\n"; std::cout << "fmin(p,q) = " << p2v(fmin(p,q)) << "\n"; std::cout << "fmax(p,q) = " << p2v(fmax(p,q)) << "\n"; std::cout << "hsum(p) = " << hsum(p) << "\n"; std::cout << "hprod(p) = " << hprod(p) << "\n"; std::cout << "hmin(p) = " << hmin(p) << "\n"; std::cout << "hmax(p) = " << hmax(p) << "\n"; } template void test_vector_operations(int N) { std::cout << "\nADEPT ARRAY\n"; std::cout << "Type: " << sizeof(Type) << "-byte floating point numbers\n"; std::cout << "Packet size: " << internal::Packet::size << "\n"; Array<1,Type> v(N), w(N); v = range(1,N); w = 2.0; std::cout << "v = " << v << "\n"; std::cout << "w = " << w << "\n"; std::cout << "v+w = " << v+w << "\n"; std::cout << "v-w = " << v-w << "\n"; std::cout << "v*w = " << v*w << "\n"; std::cout << "v/w = " << v/w << "\n"; std::cout << "sqrt(v) = " << sqrt(v) << "\n"; std::cout << "fmin(v,w) = " << fmin(v,w) << "\n"; std::cout << "fmax(v,w) = " << fmax(v,w) << "\n"; std::cout << "sum(v) = " << sum(v) << "\n"; std::cout << "product(v) = " << product(v) << "\n"; std::cout << "minval(v) = " << minval(v) << "\n"; std::cout << "maxval(v) = " << maxval(v) << "\n"; } template void test_unaligned_reduce(int N) { std::cout << "\nUNALIGNED REDUCE\n"; std::cout << "Type: " << sizeof(Type) << "-byte floating point numbers\n"; std::cout << "Packet size: " << internal::Packet::size << "\n"; Array<1,Type> v(N); v = range(1,N); std::cout << "v = " << v << "\n"; std::cout << "sum(v(range(1,end-1))) = " << sum(v(range(1,end-1))) << "\n"; } template void test_unaligned_assign(int N) { std::cout << "\nUNALIGNED ASSIGN\n"; std::cout << "Type: " << sizeof(Type) << "-byte floating point numbers\n"; std::cout << "Packet size: " << internal::Packet::size << "\n"; Array<1,Type> v(N), w(N), x(N); v = range(1,N); w = 2.0; x = 0.0; std::cout << "v = " << v << "\n"; std::cout << "w = " << w << "\n"; std::cout << "x = " << x << "\n"; std::cout << "x(range(1,end-1)) = v(range(1,end-1))+w(range(1,end-1)) ->\n"; x(range(1,end-1)) = v(range(1,end-1))+w(range(1,end-1)); std::cout << "x = " << x << "\n"; } int main(int argc, const char** argv) { // Vectorization is only carried out on arrays of length twice the // packet length or longer static const int N = 2*internal::Packet::size; test_packet_operations(); test_packet_operations(); Packet d(2.0); Packet e = fastexp(d); std::cout << "e=" << e << "\n"; test_vector_operations(N); test_vector_operations(N); test_unaligned_reduce(2*N); test_unaligned_reduce(2*N); test_unaligned_assign(2*N); test_unaligned_assign(2*N); return 0; } ================================================ FILE: test/test_radiances.cpp ================================================ /* test_radiances.cpp - "main" function for Test 3 Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include "adept.h" #include "simulate_radiances.h" using adept::Real; using adept::aReal; // This function provides an Adept interface to the simulate_radiances // function void simulate_radiances_wrapper(int n, const aReal& surface_temperature, const aReal* temperature, aReal radiance[2]) { // Create inactive (Real) versions of the active (aReal) inputs Real st = value(surface_temperature); std::vector t(n); for (int i = 0; i < n; ++i) t[i] = value(temperature[i]); // Declare variables to hold the inactive outputs and their Jacobians Real r[2]; Real dr_dst[2]; std::vector dr_dt(2*n); // Call the function with the non-Adept interface simulate_radiances(n, st, &t[0], &r[0], dr_dst, &dr_dt[0]); // Copy the results into the active variables, but use set_value in order // not to write any equivalent derivative statement to the Adept stack radiance[0].set_value(r[0]); radiance[1].set_value(r[1]); // Loop over the two radiances and add the derivative statements to // the Adept stack for (int i = 0; i < 2; ++i) { // Add the first term on the right-hand-side of Equation 1 in the text radiance[i].add_derivative_dependence(surface_temperature, dr_dst[i]); // Now append the second term on the right-hand-side of Equation // 1. The third argument "n" of the following function says that // there are n terms to be summed, and the fourth argument "2" // says to take only every second element of the Jacobian dr_dt, // since the derivatives with respect to the two radiances have // been interlaced. If the fourth argument is omitted then // relevant Jacobian elements will be assumed to be contiguous in // memory. radiance[i].append_derivative_dependence(temperature, &dr_dt[i], n, 2); } for (int i = 0; i < 2; ++i) { std::cout << "Channel " << i << "\n"; std::cout << "d[radiance]/d[surface_temperature] = " << dr_dst[i] << "\n"; std::cout << "d[radiance]/d[temperature] ="; for (int j = 0; j < n; ++j) { std::cout << " " << dr_dt[i+j*2]; } std::cout << "\n\n"; } } int main(int argc, char** argv) { // Temperature (K) at 1000-m intervals from the mid-latitude summer // standard atmosphere static const int N_POINTS = 25; static const Real temperature_profile[N_POINTS+1] = {294.0, 290.0, 285.0, 279.0, 273.0, 267.0, 261.0, 255.0, 248.0, 242.0, 235.0, 229.0, 222.0, 216.0, 216.0, 216.0, 216.0, 216.0, 216.0, 217.0, 218.0, 219.0, 220.0, 222.0, 223.0, 224.0}; // Start the Adept stack adept::Stack s; // Copy the temperature profile information into active variables aReal surface_temperature = temperature_profile[0]; aReal temperature[N_POINTS]; for (int i = 0; i < N_POINTS; i++) { temperature[i] = temperature_profile[i+1]; } // The simulated radiances will be put here... aReal sim_radiance[2]; // ...and compared to the observed radiances here with their 1-sigma // error Real obs_radiance[2] = {0.00189, 0.00140}; Real radiance_error = 2.0e-5; // Start recording derivative information s.new_recording(); // Simulate the radiances for the input surface temperature and // atmospheric temperature simulate_radiances_wrapper(N_POINTS, surface_temperature, temperature, sim_radiance); std::cout << "Simulated radiances = " << sim_radiance[0].value() << " " << sim_radiance[1].value() << "\n"; // Compute a "cost function" (or "penalty function") expressing the // sum of the squared number of error standard deviations the // simulated radiances are from the observed radiances aReal cost_function = 0.0; for (int ichan = 0; ichan < 2; ichan++) { cost_function += (sim_radiance[ichan] - obs_radiance[ichan]) * (sim_radiance[ichan] - obs_radiance[ichan]) / (radiance_error*radiance_error); } std::cout << "Cost function = " << cost_function << "\n"; // We want the computed adjoints to be gradients of the cost // function with respect to the surface temperature or atmospheric // temperature cost_function.set_gradient(1.0); // Reverse-mode automatic differentiation s.reverse(); // Extract the gradients Real dcost_dsurface_temperature = 0; Real dcost_dtemperature[N_POINTS]; surface_temperature.get_gradient(dcost_dsurface_temperature); adept::get_gradients(temperature, N_POINTS, dcost_dtemperature); std::cout << "d[cost_function]/d[surface_temperature] = " << dcost_dsurface_temperature << "\n"; std::cout << "d[cost_function]/d[temperature] ="; for (int i = 0; i < N_POINTS; i++) { std::cout << " " << dcost_dtemperature[i]; } std::cout << "\n"; } ================================================ FILE: test/test_radiances_array.cpp ================================================ /* test_radiances.cpp - "main" function for Test 3 Copyright (C) 2012-2014 The University of Reading Copyright (C) 2016 European Centre for Medium Range Weather Forecasts Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #include "adept_arrays.h" #include "simulate_radiances.h" using adept::Real; using adept::aReal; using adept::Vector; using adept::aVector; using adept::value; // This function provides an Adept interface to the simulate_radiances // function void simulate_radiances_wrapper(int n, const aReal& surface_temperature, const aVector& temperature, aReal radiance[2]) { // Create inactive (Real) versions of the active (aReal) inputs Real st = adept::value(surface_temperature); Vector t(n); for (int i = 0; i < n; i++) { t(i) = adept::value(temperature(i)); } // Declare variables to hold the inactive outputs and their Jacobians Real r[2]; Real dr_dst[2]; Vector dr_dt(2*n); // Call the function with the non-Adept interface simulate_radiances(n, st, &t[0], &r[0], dr_dst, &dr_dt[0]); // Copy the results into the active variables, but use set_value in order // not to write any equivalent derivative statement to the Adept stack radiance[0].set_value(r[0]); radiance[1].set_value(r[1]); // Loop over the two radiances and add the derivative statements to // the Adept stack for (int i = 0; i < 2; ++i) { // Add the first term on the right-hand-side of Equation 1 in the text radiance[i].add_derivative_dependence(surface_temperature, dr_dst[i]); // Now append the second term on the right-hand-side of Equation // 1. The third argument "n" of the following function says that // there are n terms to be summed, and the fourth argument "2" // says to take only every second element of the Jacobian dr_dt, // since the derivatives with respect to the two radiances have // been interlaced. If the fourth argument is omitted then // relevant Jacobian elements will be assumed to be contiguous in // memory. for (int j = 0; j < n; ++j) { radiance[i].append_derivative_dependence(temperature(j), dr_dt(i+j*2)); } } for (int i = 0; i < 2; ++i) { std::cout << "Channel " << i << "\n"; std::cout << "d[radiance]/d[surface_temperature] = " << dr_dst[i] << "\n"; std::cout << "d[radiance]/d[temperature] ="; for (int j = 0; j < n; ++j) { std::cout << " " << dr_dt[i+j*2]; } std::cout << "\n\n"; } } int main(int argc, char** argv) { // Temperature (K) at 1000-m intervals from the mid-latitude summer // standard atmosphere static const int N_POINTS = 25; static const Real temperature_profile[N_POINTS+1] = {294.0, 290.0, 285.0, 279.0, 273.0, 267.0, 261.0, 255.0, 248.0, 242.0, 235.0, 229.0, 222.0, 216.0, 216.0, 216.0, 216.0, 216.0, 216.0, 217.0, 218.0, 219.0, 220.0, 222.0, 223.0, 224.0}; // Start the Adept stack adept::Stack s; // Copy the temperature profile information into active variables aReal surface_temperature = temperature_profile[0]; aVector temperature(N_POINTS); for (int i = 0; i < N_POINTS; i++) { temperature[i] = temperature_profile[i+1]; } // The simulated radiances will be put here... aReal sim_radiance[2]; // ...and compared to the observed radiances here with their 1-sigma // error Real obs_radiance[2] = {0.00189, 0.00140}; Real radiance_error = 2.0e-5; // Start recording derivative information s.new_recording(); // Simulate the radiances for the input surface temperature and // atmospheric temperature simulate_radiances_wrapper(N_POINTS, surface_temperature, temperature, sim_radiance); std::cout << "Simulated radiances = " << sim_radiance[0].value() << " " << sim_radiance[1].value() << "\n"; // Compute a "cost function" (or "penalty function") expressing the // sum of the squared number of error standard deviations the // simulated radiances are from the observed radiances aReal cost_function = 0.0; for (int ichan = 0; ichan < 2; ichan++) { cost_function += (sim_radiance[ichan] - obs_radiance[ichan]) * (sim_radiance[ichan] - obs_radiance[ichan]) / (radiance_error*radiance_error); } std::cout << "Cost function = " << cost_function << "\n"; // We want the computed adjoints to be gradients of the cost // function with respect to the surface temperature or atmospheric // temperature cost_function.set_gradient(1.0); // Reverse-mode automatic differentiation s.reverse(); // Extract the gradients Real dcost_dsurface_temperature = 0; Vector dcost_dtemperature; surface_temperature.get_gradient(dcost_dsurface_temperature); adept::get_gradients(temperature, dcost_dtemperature); std::cout << "d[cost_function]/d[surface_temperature] = " << dcost_dsurface_temperature << "\n"; std::cout << "d[cost_function]/d[temperature] ="; for (int i = 0; i < N_POINTS; i++) { std::cout << " " << dcost_dtemperature[i]; } std::cout << "\n"; } ================================================ FILE: test/test_reduce_active.cpp ================================================ /* test_reduce_active.cpp Copyright (C) 2020 European Centre for Medium-Range Weather Forecasts Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. This file tests reduce operations on active vectors */ #include #include "adept_arrays.h" using namespace adept; #define TEST_REDUCE(FUNC) \ { \ std::cout << "\nTESTING REDUCE FUNCTION " \ << #FUNC << "\n"; \ stack.new_recording(); \ aReal J = FUNC(x); \ Real Jp = FUNC(value(x)); \ J.set_gradient(1.0); \ stack.reverse(); \ Vector dJdx = x.get_gradient(); \ std::cout << #FUNC << "(x) = " \ << J << "\n"; \ std::cout << #FUNC << "(value(x)) = " \ << Jp << "\n"; \ std::cout << "d(" << #FUNC << "(x))/dx = " \ << dJdx << "\n"; \ if (J != Jp) { ++status; } \ stack.print_statements(); \ } int main(int argc, const char** argv) { Stack stack; aVector x(5); x << -2.0, -3.0, -1.0, -50.0, 7.0; std::cout << "x = " << x << "\n"; int status = 0; TEST_REDUCE(sum); TEST_REDUCE(mean); TEST_REDUCE(maxval); TEST_REDUCE(minval); TEST_REDUCE(product); TEST_REDUCE(norm2); // Test product by hand { std::cout << "\nTESTING MANUAL PRODUCT\n"; stack.new_recording(); //aReal J = x(0)*x(1)*x(2)*x(3)*x(4); aReal J = x(0)*x(1); J *= x(2); J *= x(3); J *= x(4); J.set_gradient(1.0); stack.reverse(); Vector dJdx = x.get_gradient(); std::cout << "manual_product(x) = " << J << "\n"; std::cout << "d(manual_product(x))/x = " << dJdx << "\n"; stack.print_statements(); } // Test norm2 by hand { std::cout << "\nTESTING MANUAL NORM2\n"; stack.new_recording(); aReal J = sqrt(x(0)*x(0) + x(1)*x(1) + x(2)*x(2) + x(3)*x(3) + x(4)*x(4)); J.set_gradient(1.0); stack.reverse(); Vector dJdx = x.get_gradient(); std::cout << "manual_norm2(x) = " << J << "\n"; std::cout << "d(manual_norm2(x))/x = " << dJdx << "\n"; stack.print_statements(); } if (status != 0) { std::cout << "Error: " << status << " of the active/passive reduce operations are different\n"; } return status; } ================================================ FILE: test/test_thread_safe.cpp ================================================ /* test_thread_safe.cpp - Tests that Adept is thread-safe Copyright (C) 2012-2014 The University of Reading Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. This program tests the thread-safety of the Adept library: compile with and without ADEPT_STACK_THREAD_UNSAFE defined, and run with -serial and -parallel command-line arguments. It should crash only if ADEPT_STACK_THREAD_UNSAFE is defined AND -parallel is selected. */ #include #include #ifdef _OPENMP #include #endif // Test what happens if thread safety is disabled by uncommenting the // following //#define ADEPT_STACK_THREAD_UNSAFE 1 #include "adept.h" using adept::adouble; using adept::Real; // Number of points in spatial grid of simulation #define NX 128 // "Toon" advection scheme applied to linear advection in a 1D // periodic domain - see Adept paper for details static void toon(int nt, double c, const adouble q_init[NX], adouble q[NX]) { adouble flux[NX-1]; // Fluxes between boxes for (int i=0; i 1.0e-5) { std::cout << "*** ERROR: Jacobian from forward and reverse computations disagree (RMSD = " << rmsd << ")\n"; error_occurred = true; } else { std::cout << "CORRECT BEHAVIOUR: Jacobians from forward and reverse computations agree within tolerance\n"; } if (i == 0) { // Print information about the data held in the stack std::cout << "Stack status for iteration 0:\n" << s; // Print memory information std::cout << "Memory usage: " << s.memory() << " bytes\n\n"; } std::cout << "\n"; } return error_occurred; } int main(int argc, char** argv) { using adept::adouble; bool error_occurred = false; const double pi = 4.0*atan(1.0); // Edit these variables to change properties of simulation const int nt = 200; // Number of timesteps const double dt = 0.125; // Timestep (actually a Courant number) const int ncomputations = 8; // Initial values of field as a double array double q_init_save[NX]; bool is_parallel = true; if (argc > 1) { if (std::string("-serial") == argv[1]) { is_parallel = false; } else if (std::string("-parallel") == argv[1]) { is_parallel = true; } else { std::cout << "Usage: " << argv[0] << " [-serial|-parallel]\n"; return 1; } } std::cout << "Running " << argv[0] << "...\n"; #ifdef ADEPT_STACK_THREAD_UNSAFE std::cout << " Compiled to be THREAD UNSAFE\n"; #else std::cout << " Compiled to be THREAD SAFE\n"; #endif #ifdef _OPENMP std::cout << " " << omp_get_num_procs() << " processors available running maximum of " << omp_get_max_threads() << " threads\n"; if (is_parallel) { std::cout << " Performing " << ncomputations << " parallel computations,\n"; std::cout << " within which Jacobian (" << NX << "x" << NX << " matrix) calculations will be serial\n"; #ifdef ADEPT_STACK_THREAD_UNSAFE if (omp_get_max_threads() > 1) { std::cout << "*** You should expect this program to crash now!\n"; } #endif } else { std::cout << " Performing " << ncomputations << " serial computations,\n"; std::cout << " within which Jacobian (" << NX << "x" << NX << " matrix) calculations will be in parallel\n"; } #else std::cout << " Compiled with no OpenMP support\n"; #endif std::cout << "\n"; std::cout.flush(); // Initialize the field for (int i = 0; i < NX; i++) { q_init_save[i] = (0.5+0.5*sin((i*2.0*pi)/(NX-1.5)))+0.0001; } if (is_parallel) { #pragma omp parallel for for (int i = 0; i < ncomputations; i++) { if (compute(i, nt, dt, q_init_save)) { error_occurred = true; } } } else { for (int i = 0; i < ncomputations; i++) { if (compute(i, nt, dt, q_init_save)) { error_occurred = true; } } } if (error_occurred) { std::cout << "An error occurred\n"; } return error_occurred; } ================================================ FILE: test/test_thread_safe_arrays.cpp ================================================ /* test_thread_safe_arrays.cpp - Tests that Adept arrays are thread-safe Copyright (C) 2017 ECMWF Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. */ #ifdef _OPENMP #include #endif //#define ADEPT_STORAGE_THREAD_SAFE 1 #include int main(int argc, const char** argv) { using namespace adept; int N = 2; Matrix A(N,N); SymmMatrix S(N); Matrix B; SymmMatrix T; #ifdef ADEPT_STORAGE_THREAD_SAFE std::cout << "Storage should be thread safe\n"; // B shares the data and increases the reference counter of the // shared Storage object. If A goes out of scope, B will "steal" the // data. B >>= A; T >>= S; #else std::cout << "Storage is not thread safe: using soft_link()\n"; // B points to the data but does not have access to the Storage // object. If A goes out of scope, B will most likely point to an // inaccessible memory location. B >>= A.soft_link(); T >>= S.soft_link(); #endif A = 1.0; // Also seen by B S = 2.0; // Also seen by S int nthreads = 1; #ifdef _OPENMP nthreads = omp_get_max_threads(); std::cout << omp_get_num_procs() << " processors available running maximum of " << nthreads << " threads\n"; #else std::cout << "Compiled without OpenMP support: 1 thread\n"; #endif // The following almost always causes a crash if the code is not // properly thread safe #pragma omp parallel for for (int i = 0; i < N*1000; ++i) { for (int j = 0; j < N*1000; ++j) { B[j % N] = noalias(B(__, j % N)) + T.diag_vector(); } } if (nthreads > 1) { std::cout << "Parallel subsetting of array zillions of times was successful\n"; } else { std::cout << "Serial subsetting of array zillions of times was successful (unsurprisingly)\n"; } return 0; }