Repository: BlueBrain/CoreNeuron Branch: master Commit: 3a49f4b85a97 Files: 246 Total size: 1.3 MB Directory structure: gitextract_m67oskuu/ ├── .bbp-project.yaml ├── .clang-format.changes ├── .cmake-format.changes.yaml ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── config.yml │ │ └── feature_request.md │ ├── problem-matchers/ │ │ ├── address.json │ │ ├── gcc.json │ │ └── undefined.json │ ├── pull_request_template.md │ └── workflows/ │ ├── clang_cmake_format_check.yaml │ ├── coreneuron-ci.yml │ ├── coverage.yml │ └── test-as-submodule.yml ├── .gitignore ├── .gitlab-ci.yml ├── .gitmodules ├── .readthedocs.yml ├── .sanitizers/ │ └── undefined.supp ├── AUTHORS.txt ├── CMake/ │ ├── AddHpcCodingConvSubmodule.cmake │ ├── AddMod2cSubmodule.cmake │ ├── AddNmodlSubmodule.cmake │ ├── AddRandom123Submodule.cmake │ ├── CrayPortability.cmake │ ├── GitRevision.cmake │ ├── MakefileBuildOptions.cmake │ ├── OpenAccHelper.cmake │ ├── TestScriptUtils.cmake │ ├── config/ │ │ ├── CompilerFlagsHelpers.cmake │ │ ├── ReleaseDebugAutoFlags.cmake │ │ ├── SetRpath.cmake │ │ └── TestHelpers.cmake │ ├── coreneuron-config.cmake.in │ └── packages/ │ ├── FindSphinx.cmake │ ├── Findlikwid.cmake │ ├── Findnmodl.cmake │ └── Findreportinglib.cmake ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── coreneuron/ │ ├── CMakeLists.txt │ ├── apps/ │ │ ├── coreneuron.cpp │ │ ├── corenrn_parameters.cpp │ │ ├── corenrn_parameters.hpp │ │ └── main1.cpp │ ├── config/ │ │ ├── config.cpp.in │ │ ├── config.h │ │ ├── neuron_version.hpp.in │ │ └── version_macros.hpp │ ├── coreneuron.hpp │ ├── engine.h.in │ ├── gpu/ │ │ ├── nrn_acc_manager.cpp │ │ └── nrn_acc_manager.hpp │ ├── io/ │ │ ├── core2nrn_data_return.cpp │ │ ├── core2nrn_data_return.hpp │ │ ├── file_utils.cpp │ │ ├── file_utils.hpp │ │ ├── global_vars.cpp │ │ ├── lfp.cpp │ │ ├── lfp.hpp │ │ ├── mech_report.cpp │ │ ├── mech_report.h │ │ ├── mem_layout_util.cpp │ │ ├── mem_layout_util.hpp │ │ ├── mk_mech.cpp │ │ ├── nrn2core_data_init.cpp │ │ ├── nrn2core_direct.h │ │ ├── nrn_checkpoint.cpp │ │ ├── nrn_checkpoint.hpp │ │ ├── nrn_filehandler.cpp │ │ ├── nrn_filehandler.hpp │ │ ├── nrn_setup.cpp │ │ ├── nrn_setup.hpp │ │ ├── nrnsection_mapping.hpp │ │ ├── output_spikes.cpp │ │ ├── output_spikes.hpp │ │ ├── phase1.cpp │ │ ├── phase1.hpp │ │ ├── phase2.cpp │ │ ├── phase2.hpp │ │ ├── prcellstate.cpp │ │ ├── prcellstate.hpp │ │ ├── reports/ │ │ │ ├── binary_report_handler.cpp │ │ │ ├── binary_report_handler.hpp │ │ │ ├── nrnreport.cpp │ │ │ ├── nrnreport.hpp │ │ │ ├── report_configuration_parser.cpp │ │ │ ├── report_event.cpp │ │ │ ├── report_event.hpp │ │ │ ├── report_handler.cpp │ │ │ ├── report_handler.hpp │ │ │ ├── sonata_report_handler.cpp │ │ │ └── sonata_report_handler.hpp │ │ ├── setup_fornetcon.cpp │ │ ├── setup_fornetcon.hpp │ │ └── user_params.hpp │ ├── mechanism/ │ │ ├── capac.cpp │ │ ├── eion.cpp │ │ ├── eion.hpp │ │ ├── mech/ │ │ │ ├── cfile/ │ │ │ │ └── cabvars.h │ │ │ ├── enginemech.cpp │ │ │ ├── mod2c_core_thread.hpp │ │ │ ├── mod_func.c.pl │ │ │ └── modfile/ │ │ │ ├── exp2syn.mod │ │ │ ├── expsyn.mod │ │ │ ├── hh.mod │ │ │ ├── netstim.mod │ │ │ ├── passive.mod │ │ │ ├── pattern.mod │ │ │ ├── stim.mod │ │ │ └── svclmp.mod │ │ ├── mech_mapping.cpp │ │ ├── mech_mapping.hpp │ │ ├── mechanism.hpp │ │ ├── membfunc.hpp │ │ ├── patternstim.cpp │ │ ├── register_mech.cpp │ │ └── register_mech.hpp │ ├── membrane_definitions.h │ ├── mpi/ │ │ ├── core/ │ │ │ ├── nrnmpi.hpp │ │ │ ├── nrnmpi_def_cinc.cpp │ │ │ ├── nrnmpidec.cpp │ │ │ └── resolve.cpp │ │ ├── lib/ │ │ │ ├── mpispike.cpp │ │ │ ├── nrnmpi.cpp │ │ │ └── nrnmpi.hpp │ │ ├── nrnmpi.h │ │ ├── nrnmpidec.h │ │ └── nrnmpiuse.h │ ├── network/ │ │ ├── cvodestb.cpp │ │ ├── have2want.h │ │ ├── multisend.cpp │ │ ├── multisend.hpp │ │ ├── multisend_setup.cpp │ │ ├── netcon.hpp │ │ ├── netcvode.cpp │ │ ├── netcvode.hpp │ │ ├── netpar.cpp │ │ ├── netpar.hpp │ │ ├── partrans.cpp │ │ ├── partrans.hpp │ │ ├── partrans_setup.cpp │ │ ├── tnode.hpp │ │ ├── tqueue.cpp │ │ ├── tqueue.hpp │ │ └── tqueue.ipp │ ├── nrnconf.h │ ├── nrniv/ │ │ └── nrniv_decl.h │ ├── nrnoc/ │ │ ├── md1redef.h │ │ └── md2redef.h │ ├── permute/ │ │ ├── balance.cpp │ │ ├── cellorder.cpp │ │ ├── cellorder.cu │ │ ├── cellorder.hpp │ │ ├── cellorder1.cpp │ │ ├── cellorder2.cpp │ │ ├── data_layout.cpp │ │ ├── data_layout.hpp │ │ ├── node_permute.cpp │ │ └── node_permute.h │ ├── sim/ │ │ ├── fadvance_core.cpp │ │ ├── fast_imem.cpp │ │ ├── fast_imem.hpp │ │ ├── finitialize.cpp │ │ ├── multicore.cpp │ │ ├── multicore.hpp │ │ ├── scopmath/ │ │ │ ├── abort.cpp │ │ │ ├── crout_thread.hpp │ │ │ ├── errcodes.h │ │ │ ├── newton_struct.h │ │ │ ├── newton_thread.cpp │ │ │ ├── newton_thread.hpp │ │ │ ├── sparse_thread.hpp │ │ │ └── ssimplic_thread.hpp │ │ ├── solve_core.cpp │ │ └── treeset_core.cpp │ └── utils/ │ ├── ivocvect.cpp │ ├── ivocvect.hpp │ ├── lpt.cpp │ ├── lpt.hpp │ ├── memory.cpp │ ├── memory.h │ ├── memory_utils.cpp │ ├── memory_utils.h │ ├── nrn_assert.h │ ├── nrn_stats.cpp │ ├── nrn_stats.h │ ├── nrnmutdec.hpp │ ├── nrnoc_aux.cpp │ ├── nrnoc_aux.hpp │ ├── nrntimeout.cpp │ ├── offload.hpp │ ├── profile/ │ │ └── profiler_interface.h │ ├── progressbar/ │ │ ├── progressbar.cpp │ │ └── progressbar.hpp │ ├── randoms/ │ │ ├── nrnran123.cpp │ │ └── nrnran123.h │ ├── string_utils.cpp │ ├── string_utils.h │ ├── units.hpp │ ├── utils.cpp │ ├── utils.hpp │ ├── utils_cuda.h │ ├── vrecitem.h │ └── vrecord.cpp ├── docs/ │ ├── Doxyfile.in │ ├── DoxygenLayout.xml │ ├── README.md │ ├── _static/ │ │ └── custom.css │ ├── conda_environment.yml │ ├── conf.py │ ├── docs_requirements.txt │ ├── doxygen.rst │ ├── footer.html │ ├── index.rst │ └── userdoc/ │ ├── BinaryFormat/ │ │ └── BinaryFormat.md │ └── MemoryManagement/ │ └── bbcorepointer.md ├── extra/ │ ├── CMakeLists.txt │ ├── instrumentation.tau │ ├── nrnivmodl-core.in │ └── nrnivmodl_core_makefile.in └── tests/ ├── CMakeLists.txt ├── integration/ │ ├── CMakeLists.txt │ ├── README.md │ ├── integration_test.sh.in │ ├── reportinglib/ │ │ ├── 1.check.in │ │ ├── 1.conf.in │ │ ├── 1.report │ │ ├── reporting_test.sh.in │ │ └── test_ref.out │ ├── ring/ │ │ └── out.dat.ref │ └── ring_gap/ │ ├── mod files/ │ │ └── halfgap.mod │ └── out.dat.ref └── unit/ ├── alignment/ │ ├── CMakeLists.txt │ └── alignment.cpp ├── cmdline_interface/ │ ├── CMakeLists.txt │ └── test_cmdline_interface.cpp ├── interleave_info/ │ ├── CMakeLists.txt │ └── check_constructors.cpp ├── lfp/ │ ├── CMakeLists.txt │ └── lfp.cpp ├── queueing/ │ ├── CMakeLists.txt │ └── test_queueing.cpp └── solver/ ├── CMakeLists.txt └── test_solver.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bbp-project.yaml ================================================ tools: ClangFormat: enable: True include: match: - coreneuron/.*\.((cu)|(h)|([chi]pp))$ CMakeFormat: enable: True ================================================ FILE: .clang-format.changes ================================================ IndentCaseLabels: true SortIncludes: false StatementMacros: [nrn_pragma_acc, nrn_pragma_omp] ================================================ FILE: .cmake-format.changes.yaml ================================================ additional_commands: cpp_cc_build_time_copy: flags: ['NO_TARGET'] kwargs: INPUT: '1' OUTPUT: '1' ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the issue** A clear and concise description of what the issue is. **To Reproduce** Steps to reproduce the behavior: ```bash A simple script ``` **Expected behavior** A clear and concise description of what you expected to happen. **Logs** If possible attach helpful logs related to the issue. If there is an issue during build `CMakeError.log`, `CMakeOutput.log` or the output of `make VERBOSE=1` would be helpful. Otherwise any error printed to the therminal. **System (please complete the following information)** - OS: [e.g. Ubuntu 20.04] - Compiler: [e.g. PGI 20.9] - Version: [e.g. master branch] - Backend: [e.g. CPU] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: true ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context about the feature request here. ================================================ FILE: .github/problem-matchers/address.json ================================================ { "problemMatcher": [ { "owner": "asan-problem-matcher", "severity": "warning", "pattern": [ { "regexp": "^.*AddressSanitizer: (.*)$", "message": 1 } ] } ] } ================================================ FILE: .github/problem-matchers/gcc.json ================================================ { "__comment": "Taken from vscode-cpptools's Extension/package.json gcc rule", "problemMatcher": [ { "owner": "gcc-problem-matcher", "pattern": [ { "regexp": "^\\.\\./(.*):(\\d+):(\\d+):\\s+(?:fatal\\s+)?(warning|error):\\s+(.*)$", "file": 1, "line": 2, "column": 3, "severity": 4, "message": 5 } ] } ] } ================================================ FILE: .github/problem-matchers/undefined.json ================================================ { "problemMatcher": [ { "owner": "ubsan-problem-matcher", "severity": "warning", "pattern": [ { "regexp": "^.*\\/(src\\/.*):(\\d+):(\\d+): runtime error: (.*)$", "file": 1, "line": 2, "column": 3, "message": 4 }, { "regexp": "^.*UndefinedBehaviorSanitizer:.*$" } ] } ] } ================================================ FILE: .github/pull_request_template.md ================================================ **Description** Please include a summary of the change and which issue is fixed or which feature is added. - [ ] Issue 1 fixed - [ ] Issue 2 fixed - [ ] Feature 1 added - [ ] Feature 2 added Fixes # (issue) **How to test this?** Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce if there is no integration test added with this PR. Please also list any relevant details for your test configuration ```bash cmake .. make -j8 nrnivmodl mod ./bin/nrnivmodl-core mod ./x86_64/special script.py ./x86_64/special-core --tstop=10 --datpath=coredat ``` **Test System** - OS: [e.g. Ubuntu 20.04] - Compiler: [e.g. PGI 20.9] - Version: [e.g. master branch] - Backend: [e.g. CPU] **Use certain branches in CI pipelines.** CI_BRANCHES:NEURON_BRANCH=master,NMODL_BRANCH=master,SPACK_BRANCH=develop ================================================ FILE: .github/workflows/clang_cmake_format_check.yaml ================================================ name: clang-cmake-format-check concurrency: group: ${{ github.workflow }}#${{ github.ref }} cancel-in-progress: true on: push: jobs: build: name: clang-cmake-format-check runs-on: ubuntu-22.04 steps: - name: Fetch repository uses: actions/checkout@v3 - name: Fetch hpc-coding-conventions submodules shell: bash working-directory: ${{runner.workspace}}/CoreNeuron run: git submodule update --init --depth 1 -- CMake/hpc-coding-conventions - name: Run clang-format and cmake-format shell: bash working-directory: ${{runner.workspace}}/CoreNeuron run: CMake/hpc-coding-conventions/bin/format -v --dry-run ================================================ FILE: .github/workflows/coreneuron-ci.yml ================================================ name: CoreNEURON CI concurrency: group: ${{ github.workflow }}#${{ github.ref }} cancel-in-progress: true on: push: branches: - master - release/** pull_request: branches: - master - release/** env: BUILD_TYPE: Release DEFAULT_PY_VERSION: 3.8 MACOSX_DEPLOYMENT_TARGET: 11.0 jobs: ci: runs-on: ${{ matrix.os }} name: ${{ matrix.os }} - ${{ toJson(matrix.config) }}) env: SDK_ROOT: $(xcrun --sdk macosx --show-sdk-path) strategy: matrix: os: [ubuntu-20.04, macOS-11] config: # Defaults: CORENRN_ENABLE_MPI=ON - {cmake_option: "-DCORENRN_ENABLE_MPI_DYNAMIC=ON", flag_warnings: ON} - {cmake_option: "-DCORENRN_ENABLE_MPI_DYNAMIC=ON -DCORENRN_ENABLE_SHARED=OFF"} - {cmake_option: "-DCORENRN_ENABLE_MPI=OFF"} - {use_nmodl: ON, py_version: 3.7} - {use_nmodl: ON} include: - os: ubuntu-20.04 config: gcc_version: 10 - os: ubuntu-20.04 config: cmake_option: -DCORENRN_ENABLE_DEBUG_CODE=ON documentation: ON - os: ubuntu-22.04 config: sanitizer: address - os: ubuntu-22.04 config: flag_warnings: ON sanitizer: undefined fail-fast: false steps: - name: Install homebrew packages if: startsWith(matrix.os, 'macOS') run: | brew update brew install bison boost ccache coreutils flex ninja openmpi echo /usr/local/opt/flex/bin:/usr/local/opt/bison/bin >> $GITHUB_PATH shell: bash - name: Install apt packages if: startsWith(matrix.os, 'ubuntu') run: | sudo apt-get install bison ccache doxygen flex libboost-all-dev \ libfl-dev libopenmpi-dev ninja-build openmpi-bin shell: bash - name: Install specific apt packages if: startsWith(matrix.os, 'ubuntu') && matrix.config.gcc_version run: | sudo apt-get install gcc-${{matrix.config.gcc_version}} echo CC="gcc-${{matrix.config.gcc_version}}" >> $GITHUB_ENV echo CXX="g++-${{matrix.config.gcc_version}}" >> $GITHUB_ENV shell: bash - name: Set up Python3 uses: actions/setup-python@v4 with: python-version: ${{ env.PYTHON_VERSION }} env: PYTHON_VERSION: ${{matrix.config.py_version || env.DEFAULT_PY_VERSION}} - name: Install NMODL dependencies if: ${{ matrix.config.use_nmodl == 'ON' }} run: | python3 -m pip install --upgrade pip jinja2 pyyaml pytest sympy - uses: actions/checkout@v3 - name: Install documentation dependencies if: ${{matrix.config.documentation == 'ON'}} working-directory: ${{runner.workspace}}/CoreNeuron run: | sudo apt-get install doxygen python3 -m pip install --upgrade pip python3 -m pip install --upgrade -r docs/docs_requirements.txt - name: Register compiler warning problem matcher if: ${{matrix.config.flag_warnings == 'ON'}} run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Register sanitizer problem matcher if: ${{matrix.config.sanitizer}} run: echo "::add-matcher::.github/problem-matchers/${{matrix.config.sanitizer}}.json" - name: Hash config dictionary run: | cat << EOF > matrix.json ${{toJSON(matrix.config)}} EOF echo matrix.config JSON: cat matrix.json echo ----- # Workaround for https://github.com/actions/cache/issues/92 - name: Checkout cache action uses: actions/checkout@v3 with: repository: actions/cache ref: v3 path: tmp/actions/cache - name: Make actions/cache@v3 run even on failure run: | sed -i'.bak' -e '/ post-if: /d' tmp/actions/cache/action.yml - name: Restore compiler cache uses: ./tmp/actions/cache with: path: | ${{runner.workspace}}/ccache key: ${{matrix.os}}-${{hashfiles('matrix.json')}}-${{github.ref}}-${{github.sha}} restore-keys: | ${{matrix.os}}-${{hashfiles('matrix.json')}}-${{github.ref}}- ${{matrix.os}}-${{hashfiles('matrix.json')}}- - name: Build and Test id: build-test shell: bash working-directory: ${{runner.workspace}}/CoreNeuron run: | cmake_args=(${{matrix.config.cmake_option}}) if [[ "${{ startsWith(matrix.os, 'macOS') }}" = "true" ]]; then cmake_args+=(-DCORENRN_ENABLE_OPENMP=OFF) else cmake_args+=(-DCORENRN_ENABLE_OPENMP=ON) fi if [[ "${{matrix.config.flag_warnings}}" == "ON" ]]; then cmake_args+=(-DCORENRN_EXTRA_CXX_FLAGS="-Wall") fi if [[ -n "${{matrix.config.sanitizer}}" ]]; then CC=$(command -v clang-14) CXX=$(command -v clang++-14) symbolizer_path=$(realpath $(command -v llvm-symbolizer-14)) cmake_args+=(-DCMAKE_BUILD_TYPE=Custom \ -DCMAKE_C_FLAGS="-O1 -g -Wno-writable-strings" \ -DCMAKE_CXX_FLAGS="-O1 -g -Wno-writable-strings" \ -DLLVM_SYMBOLIZER_PATH="${symbolizer_path}" \ -DCORENRN_SANITIZERS=$(echo ${{matrix.config.sanitizer}} | sed -e 's/-/,/g')) else CC=${CC:-gcc} CXX=${CXX:-g++} fi echo "------- Build, Test and Install -------" mkdir build && cd build if [[ "$USE_NMODL" == "ON" ]]; then cmake_args+=(-DCORENRN_ENABLE_NMODL=ON "-DCORENRN_NMODL_FLAGS=sympy --analytic") fi cmake .. -G Ninja "${cmake_args[@]}" \ -DCMAKE_C_COMPILER="${CC}" \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DCMAKE_CXX_COMPILER="${CXX}" \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ "-DCMAKE_INSTALL_PREFIX=${{runner.workspace}}/install" \ -DPYTHON_EXECUTABLE=$(command -v python3) if ccache --version | grep -E '^ccache version 4\.(4|4\.1)$' then echo "------- Disable ccache direct mode -------" # https://github.com/ccache/ccache/issues/935 export CCACHE_NODIRECT=1 fi ccache -z # Older versions don't support -v (verbose) ccache -vs 2>/dev/null || ccache -s cmake --build . --parallel ccache -vs 2>/dev/null || ccache -s ctest -T Test --output-on-failure cmake --build . --target install env: CCACHE_BASEDIR: ${{runner.workspace}}/CoreNeuron CCACHE_DIR: ${{runner.workspace}}/ccache USE_NMODL: ${{matrix.config.use_nmodl}} - uses: actions/upload-artifact@v3 with: name: ctest-results-${{hashfiles('matrix.json')}}-sanitizer path: ${{runner.workspace}}/CoreNeuron/build/Testing/*/Test.xml # This step will set up an SSH connection on tmate.io for live debugging. # To enable it, you have to: # * add 'live-debug-ci' to your PR title # * push something to your PR branch (note that just re-running the pipeline disregards the title update) - name: live debug session on failure (manual steps required, check `.github/workflows/coreneuron-ci.yml`) if: failure() && contains(github.event.pull_request.title, 'live-debug-ci') uses: mxschmitt/action-tmate@v3 - name: Documentation if: ${{ startsWith(matrix.os, 'ubuntu') && matrix.config.documentation == 'ON' }} id: documentation working-directory: ${{runner.workspace}}/CoreNeuron/build run: | echo "------- Build Doxygen Documentation -------"; cmake --build . --target docs echo "-------- Disable jekyll --------"; pushd docs; touch .nojekyll; echo ::set-output name=status::done - name: Deploy 🚀 uses: JamesIves/github-pages-deploy-action@v4 if: steps.documentation.outputs.status == 'done' && github.ref == 'refs/heads/master' with: branch: gh-pages # The branch the action should deploy to. folder: ${{runner.workspace}}/CoreNeuron/build/docs # The folder the action should deploy. single-commit: true #have a single commit on the deployment branch instead of maintaining the full history ================================================ FILE: .github/workflows/coverage.yml ================================================ name: Coverage concurrency: group: ${{ github.workflow }}#${{ github.ref }} cancel-in-progress: true on: push: branches: - master - release/** pull_request: branches: - master - release/** env: CMAKE_BUILD_PARALLEL_LEVEL: 3 jobs: coverage: runs-on: ubuntu-20.04 name: "Coverage Test" steps: - name: Install packages run: | sudo apt-get update sudo apt-get install bison doxygen flex lcov libboost-all-dev \ libopenmpi-dev libfl-dev ninja-build openmpi-bin python3-dev \ python3-pip shell: bash - uses: actions/checkout@v3 with: fetch-depth: 2 - name: Build and Test for Coverage id: build-test shell: bash working-directory: ${{runner.workspace}}/CoreNeuron run: | mkdir build && cd build cmake .. -G Ninja \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_C_FLAGS="-coverage" \ -DCMAKE_CXX_FLAGS="-coverage" \ -DCORENRN_ENABLE_MPI=ON \ -DCORENRN_ENABLE_DEBUG_CODE=ON cmake --build . (cd ..; lcov --capture --initial --directory . --no-external --output-file build/coverage-base.info) ctest --output-on-failure (cd ..; lcov --capture --directory . --no-external --output-file build/coverage-run.info) lcov --add-tracefile coverage-base.info --add-tracefile coverage-run.info --output-file coverage-combined.info lcov --remove coverage-combined.info --output-file coverage.info "*/external/*" lcov --list coverage.info - name: Upload to codecov.io run: | # Download codecov script and perform integrity checks curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --import # One-time step curl -Os https://uploader.codecov.io/latest/linux/codecov curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM.sig gpg --verify codecov.SHA256SUM.sig codecov.SHA256SUM shasum -a 256 -c codecov.SHA256SUM chmod +x codecov ./codecov -f build/coverage.info ================================================ FILE: .github/workflows/test-as-submodule.yml ================================================ name: NEURON submodule concurrency: group: ${{ github.workflow }}#${{ github.ref }} cancel-in-progress: true on: push: branches: - master - release/** pull_request: branches: - master - release/** jobs: ci: name: ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: include: - os: ubuntu-20.04 cores: 2 - os: macOS-11 cores: 3 fail-fast: false env: CMAKE_BUILD_PARALLEL_LEVEL: ${{matrix.cores}} SDK_ROOT: $(xcrun --sdk macosx --show-sdk-path) steps: - name: Install homebrew packages if: startsWith(matrix.os, 'macOS') run: | brew install bison coreutils flex ninja openmpi python3 -m pip install --upgrade numpy pytest pytest-cov echo /usr/local/opt/flex/bin:/usr/local/opt/bison/bin >> $GITHUB_PATH echo "CC=gcc" >> $GITHUB_ENV echo "CXX=g++" >> $GITHUB_ENV - name: Install apt packages if: startsWith(matrix.os, 'ubuntu') run: | sudo apt-get update sudo apt-get install bison cython3 flex libfl-dev libopenmpi-dev \ ninja-build openmpi-bin python3-dev python3 -m pip install --upgrade numpy pytest pytest-cov echo "CC=gcc" >> $GITHUB_ENV echo "CXX=g++" >> $GITHUB_ENV - name: Set NEURON branch id: vars env: GITHUB_PR_BODY: ${{ github.event.pull_request.body }} run: | nrn_branch=$(echo "${GITHUB_PR_BODY}" | grep "^CI_BRANCHES" \ | awk -F '[:,]{1}NEURON_BRANCH=' '{print $2}' \ | awk -F ',' '{print $1}') if [ -z "$nrn_branch" ]; then nrn_branch=master fi echo "Will use neuron branch: $nrn_branch" echo ::set-output name=neuron_branch::"${nrn_branch}" - uses: actions/checkout@v3 name: Checkout NEURON with: path: nrn repository: neuronsimulator/nrn ref: ${{ steps.vars.outputs.neuron_branch }} - name: Update CoreNEURON submodule run: | cd ${GITHUB_WORKSPACE}/nrn coreneuron_sha=${{github.event.pull_request.head.sha}} if [[ -z ${coreneuron_sha} ]]; then # presumably we're running on a push event coreneuron_sha=${{github.sha}} fi echo "Using CoreNEURON SHA ${coreneuron_sha}" # https://stackoverflow.com/a/33575837 git update-index --cacheinfo 160000,${coreneuron_sha},external/coreneuron git submodule update --init external/coreneuron echo "NEURON status" git status git log -n 1 cd external/coreneuron echo "CoreNEURON status" git status git log -n 1 - name: Configure NEURON run: | cd ${GITHUB_WORKSPACE}/nrn mkdir build install cd build # NEURON CMake assumes this is defined. export SHELL=$(command -v bash) openMP=" -DCORENRN_ENABLE_OPENMP=ON" if [[ "${{ startsWith(matrix.os, 'macOS') }}" = "true" ]]; then openMP=" -DCORENRN_ENABLE_OPENMP=OFF" fi cmake .. -G Ninja \ -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_INSTALL_PREFIX=../install \ -DPYTHON_EXECUTABLE=$(command -v python3) \ -DNRN_ENABLE_CORENEURON=ON \ -DNRN_ENABLE_INTERVIEWS=OFF \ -DNRN_ENABLE_RX3D=OFF \ -DNRN_ENABLE_MPI_DYNAMIC=ON \ -DNRN_ENABLE_TESTS=ON ${openMP} - name: Build NEURON run: | cd ${GITHUB_WORKSPACE}/nrn/build cmake --build . --parallel - name: Test NEURON run: | cd ${GITHUB_WORKSPACE}/nrn/build ctest --output-on-failure - name: Install NEURON run: | cd ${GITHUB_WORKSPACE}/nrn/build cmake --build . --target install # This step will set up an SSH connection on tmate.io for live debugging. # To enable it, you have to: # * add 'live-debug-ci' to your PR title # * push something to your PR branch (note that just re-running the pipeline disregards the title update) - name: live debug session on failure (manual steps required, check `.github/workflows/test-as-submodule.yml`) if: failure() && contains(github.event.pull_request.title, 'live-debug-ci') uses: mxschmitt/action-tmate@v3 ================================================ FILE: .gitignore ================================================ cmake-build-debug* *build* spconfig.* *~ .DS_Store *.swp *.srctrl* # HPC coding conventions .clang-format .clang-tidy .cmake-format.yaml .pre-commit-config.yaml .bbp-project-venv/ ================================================ FILE: .gitlab-ci.yml ================================================ include: - project: hpc/gitlab-pipelines file: - spack-build-components.gitlab-ci.yml - github-project-pipelines.gitlab-ci.yml ref: '$GITLAB_PIPELINES_BRANCH' - project: hpc/gitlab-upload-logs file: enable-upload.yml variables: NEURON_BRANCH: description: Branch of NEURON to build against CoreNEURON (NEURON_COMMIT and NEURON_TAG also possible) value: master NMODL_BRANCH: description: Branch of NMODL to build CoreNEURON against (NMODL_COMMIT and NMODL_TAG also possible) value: master SPACK_BRANCH: description: Branch of BlueBrain Spack to use for the CI pipeline value: develop SPACK_DEPLOYMENT_SUFFIX: description: Extra path component used when finding deployed software. Set to something like `pulls/1497` use software built for https://github.com/BlueBrain/spack/pull/1497. You probably want to set SPACK_BRANCH to the branch used in the relevant PR if you set this. value: '' # Set up Spack spack_setup: extends: .spack_setup_ccache variables: CORENEURON_COMMIT: ${CI_COMMIT_SHA} # Enable fetching GitHub PR descriptions and parsing them to find out what # branches to build of other projects. PARSE_GITHUB_PR_DESCRIPTIONS: "true" simulation_stack: stage: .pre # Take advantage of GitHub PR description parsing in the spack_setup job. needs: [spack_setup] trigger: project: hpc/sim/blueconfigs # CoreNEURON CI status depends on the BlueConfigs CI status. strategy: depend variables: GITLAB_PIPELINES_BRANCH: $GITLAB_PIPELINES_BRANCH SPACK_ENV_FILE_URL: $SPACK_SETUP_COMMIT_MAPPING_URL # Performance seems to be terrible when we get too many jobs on a single node. .build: extends: [.spack_build] variables: bb5_ntasks: 2 # so we block 16 cores bb5_cpus_per_task: 8 # ninja -j {this} bb5_memory: 76G # ~16*384/80 .spack_intel: variables: SPACK_PACKAGE_COMPILER: intel .spack_nvhpc: variables: SPACK_PACKAGE_COMPILER: nvhpc .build_neuron: extends: [.build] timeout: two hours variables: bb5_duration: "2:00:00" SPACK_PACKAGE: neuron SPACK_PACKAGE_SPEC: +coreneuron+debug+tests~legacy-unit~rx3d model_tests=channel-benchmark,olfactory,tqperf-heavy .gpu_node: variables: bb5_constraint: volta bb5_cpus_per_task: 2 .test_neuron: extends: [.ctest] variables: bb5_ntasks: 16 bb5_memory: 76G # ~16*384/80 # Build NMODL once with GCC build:nmodl: extends: [.build] variables: SPACK_PACKAGE: nmodl SPACK_PACKAGE_SPEC: ~legacy-unit SPACK_PACKAGE_COMPILER: gcc # Build CoreNEURON .build_coreneuron: extends: [.build] variables: SPACK_PACKAGE: coreneuron # NEURON depends on py-mpi4py, most of whose dependencies are pulled in by # nmodl%gcc, with the exception of MPI, which is pulled in by # coreneuron%{nvhpc,intel}. hpe-mpi is an external package anyway, so # setting its compiler is just changing how it is labelled in the # dependency graph and not changing which installation is used, but this # means that in the NEURON step an existing py-mpi4py%gcc can be used. # Otherwise a new py-mpi4py with hpe-mpi%{nvhpc,intel} will be built. # caliper: papi%nvhpc does not build; use the caliper from the deployment # TODO: fix this more robustly so we don't have to play so many games. SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc ^caliper%gcc+cuda cuda_arch=70 # TODO: improve coverage by switching an Intel build to be statically linked # TODO: improve coverage by switching an Intel build to RelWithDebInfo # TODO: improve coverage by enabling +openmp on an Intel build build:coreneuron:mod2c:intel:shared:debug: extends: [.build_coreneuron, .spack_intel] variables: SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=Debug build:coreneuron:nmodl:intel:debug:legacy: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: SPACK_PACKAGE_SPEC: +caliper~gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug # Disable caliper to improve coverage build:coreneuron:nmodl:intel:shared:debug: extends: [.build_coreneuron, .spack_intel] needs: ["build:nmodl"] variables: SPACK_PACKAGE_DEPENDENCIES: ^hpe-mpi%gcc SPACK_PACKAGE_SPEC: ~caliper~gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=Debug # Not linked to a NEURON build+test job, see # https://github.com/BlueBrain/CoreNeuron/issues/594 build:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.build_coreneuron, .spack_nvhpc] variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl+openmp~shared+tests+unified build_type=Debug # Shared + OpenACC + OpenMP host threading has problems build:coreneuron:mod2c:nvhpc:acc:shared: extends: [.build_coreneuron, .spack_nvhpc] variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit~nmodl~openmp+shared+tests~unified build_type=RelWithDebInfo build:coreneuron:nmodl:nvhpc:acc:debug:legacy: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp~shared~sympy+tests~unified build_type=Debug build:coreneuron:nmodl:nvhpc:acc:shared: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl~openmp+shared+sympy+tests~unified build_type=RelWithDebInfo build:coreneuron:nmodl:nvhpc:omp:legacy: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared~sympy+tests~unified build_type=RelWithDebInfo build:coreneuron:nmodl:nvhpc:omp:debug: extends: [.build_coreneuron, .spack_nvhpc] needs: ["build:nmodl"] variables: SPACK_PACKAGE_SPEC: +caliper+gpu~legacy-unit+nmodl+openmp~shared+sympy+tests~unified build_type=Debug # Build NEURON build:neuron:mod2c:intel:shared:debug: extends: [.build_neuron, .spack_intel] needs: ["build:coreneuron:mod2c:intel:shared:debug"] build:neuron:nmodl:intel:debug:legacy: extends: [.build_neuron, .spack_intel] needs: ["build:coreneuron:nmodl:intel:debug:legacy"] build:neuron:nmodl:intel:shared:debug: extends: [.build_neuron, .spack_intel] needs: ["build:coreneuron:nmodl:intel:shared:debug"] build:neuron:mod2c:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] build:neuron:nmodl:nvhpc:acc:debug:legacy: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] build:neuron:nmodl:nvhpc:acc:shared: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] build:neuron:nmodl:nvhpc:omp:legacy: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] build:neuron:nmodl:nvhpc:omp:debug: extends: [.build_neuron, .spack_nvhpc] needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test CoreNEURON test:coreneuron:mod2c:intel:shared:debug: extends: [.ctest] needs: ["build:coreneuron:mod2c:intel:shared:debug"] test:coreneuron:nmodl:intel:debug:legacy: extends: [.ctest] needs: ["build:coreneuron:nmodl:intel:debug:legacy"] test:coreneuron:nmodl:intel:shared:debug: extends: [.ctest] needs: ["build:coreneuron:nmodl:intel:shared:debug"] test:coreneuron:mod2c:nvhpc:acc:debug:unified: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:debug:unified"] test:coreneuron:mod2c:nvhpc:acc:shared: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:mod2c:nvhpc:acc:shared"] test:coreneuron:nmodl:nvhpc:acc:debug:legacy: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:acc:debug:legacy"] test:coreneuron:nmodl:nvhpc:acc:shared: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:acc:shared"] test:coreneuron:nmodl:nvhpc:omp:legacy: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:omp:legacy"] test:coreneuron:nmodl:nvhpc:omp:debug: extends: [.ctest, .gpu_node] needs: ["build:coreneuron:nmodl:nvhpc:omp:debug"] # Test NEURON test:neuron:mod2c:intel:shared:debug: extends: [.test_neuron] needs: ["build:neuron:mod2c:intel:shared:debug"] test:neuron:nmodl:intel:debug:legacy: extends: [.test_neuron] needs: ["build:neuron:nmodl:intel:debug:legacy"] test:neuron:nmodl:intel:shared:debug: extends: [.test_neuron] needs: ["build:neuron:nmodl:intel:shared:debug"] test:neuron:mod2c:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:mod2c:nvhpc:acc:shared"] test:neuron:nmodl:nvhpc:acc:debug:legacy: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:acc:debug:legacy"] test:neuron:nmodl:nvhpc:acc:shared: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:acc:shared"] test:neuron:nmodl:nvhpc:omp:legacy: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:omp:legacy"] test:neuron:nmodl:nvhpc:omp:debug: extends: [.test_neuron, .gpu_node] needs: ["build:neuron:nmodl:nvhpc:omp:debug"] ================================================ FILE: .gitmodules ================================================ [submodule "external/mod2c"] path = external/mod2c url = https://github.com/BlueBrain/mod2c [submodule "external/CLI11"] path = external/CLI11 url = https://github.com/CLIUtils/CLI11.git [submodule "external/nmodl"] path = external/nmodl url = https://github.com/BlueBrain/nmodl [submodule "external/Random123"] path = external/Random123 url = https://github.com/BlueBrain/Random123.git [submodule "CMake/hpc-coding-conventions"] path = CMake/hpc-coding-conventions url = https://github.com/BlueBrain/hpc-coding-conventions.git ================================================ FILE: .readthedocs.yml ================================================ version: 2 conda: environment: docs/conda_environment.yml python: install: - requirements: docs/docs_requirements.txt ================================================ FILE: .sanitizers/undefined.supp ================================================ unsigned-integer-overflow:_philox4x32bumpkey(r123array2x32) unsigned-integer-overflow:coreneuron::TNode::mkhash() unsigned-integer-overflow:std::mersenne_twister_engine ================================================ FILE: AUTHORS.txt ================================================ Akiko Sato Aleksandr Ovcharenko Alessandro Cattabiani Alexander Dietz Alexandru Săvulescu Antonio Bellotta Baudouin Del Marmol Bruno Magalhaes Christos Kotsalos Fabien Delalondre Felix Schuermann (contributor) Fernando Pereira Francesco Cremonesi Ioannis Magkanaris James Gonzalo King Jeremy Fouriaux Jorge Blanco Alonso Kai Langen Michael Lee Hines Nicolas Cornu Olli Lupton Omar Awile Oren Amsalem Pramod Shivaji Kumbhar (maintainer) Sam Yates Sergio Rivas-Gomez Tapasweni Pathak Weina Ji viniciusdepadua ================================================ FILE: CMake/AddHpcCodingConvSubmodule.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= include(FindPackageHandleStandardArgs) find_package(FindPkgConfig QUIET) find_path( HpcCodingConv_PROJ NAMES setup.cfg PATHS "${CORENEURON_PROJECT_SOURCE_DIR}/CMake/hpc-coding-conventions/") find_package_handle_standard_args(HpcCodingConv REQUIRED_VARS HpcCodingConv_PROJ) if(NOT HpcCodingConv_FOUND) find_package(Git 1.8.3 QUIET) if(NOT ${GIT_FOUND}) message(FATAL_ERROR "git not found, clone repository with --recursive") endif() message( STATUS "Sub-module CMake/hpc-coding-conventions missing: running git submodule update --init") execute_process( COMMAND ${GIT_EXECUTABLE} submodule update --init -- ${CORENEURON_PROJECT_SOURCE_DIR}/CMake/hpc-coding-conventions WORKING_DIRECTORY ${CORENEURON_PROJECT_SOURCE_DIR}) endif() ================================================ FILE: CMake/AddMod2cSubmodule.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= find_package(FindPkgConfig QUIET) find_path( MOD2C_PROJ NAMES CMakeLists.txt PATHS "${CORENEURON_PROJECT_SOURCE_DIR}/external/mod2c") find_package_handle_standard_args(MOD2C REQUIRED_VARS MOD2C_PROJ) if(NOT MOD2C_FOUND) find_package(Git 1.8.3 QUIET) if(NOT ${GIT_FOUND}) message(FATAL_ERROR "git not found, clone repository with --recursive") endif() message(STATUS "Sub-module mod2c missing : running git submodule update --init --recursive") execute_process( COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${CORENEURON_PROJECT_SOURCE_DIR}/external/mod2c WORKING_DIRECTORY ${CORENEURON_PROJECT_SOURCE_DIR}) else() message(STATUS "Using mod2c submodule from ${MOD2C_PROJ}") endif() add_subdirectory(${CORENEURON_PROJECT_SOURCE_DIR}/external/mod2c) ================================================ FILE: CMake/AddNmodlSubmodule.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= find_package(FindPkgConfig QUIET) find_path( NMODL_PROJ NAMES CMakeLists.txt PATHS "${CORENEURON_PROJECT_SOURCE_DIR}/external/nmodl") find_package_handle_standard_args(NMODL REQUIRED_VARS NMODL_PROJ) if(NOT NMODL_FOUND) find_package(Git 1.8.3 QUIET) if(NOT ${GIT_FOUND}) message(FATAL_ERROR "git not found, clone repository with --recursive") endif() message(STATUS "Sub-module nmodl missing : running git submodule update --init") execute_process( COMMAND ${GIT_EXECUTABLE} submodule update --init -- ${CORENEURON_PROJECT_SOURCE_DIR}/external/nmodl WORKING_DIRECTORY ${CORENEURON_PROJECT_SOURCE_DIR}) else() message(STATUS "Using nmodl submodule from ${NMODL_PROJ}") endif() add_subdirectory(${CORENEURON_PROJECT_SOURCE_DIR}/external/nmodl) ================================================ FILE: CMake/AddRandom123Submodule.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= include(FindPackageHandleStandardArgs) find_package(FindPkgConfig QUIET) find_path( Random123_PROJ NAMES LICENSE PATHS "${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123" NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_SYSTEM_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH) find_package_handle_standard_args(Random123 REQUIRED_VARS Random123_PROJ) if(NOT Random123_FOUND) find_package(Git 1.8.3 QUIET) if(NOT ${GIT_FOUND}) message(FATAL_ERROR "git not found, clone repository with --recursive") endif() message(STATUS "Sub-module Random123 missing: running git submodule update --init --recursive") execute_process( COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123 WORKING_DIRECTORY ${CORENEURON_PROJECT_SOURCE_DIR}) endif() ================================================ FILE: CMake/CrayPortability.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= if(IS_DIRECTORY "/opt/cray") set(CRAY_SYSTEM TRUE) endif() if(CRAY_SYSTEM) # default build type is static for cray if(NOT DEFINED COMPILE_LIBRARY_TYPE) set(COMPILE_LIBRARY_TYPE "STATIC") endif() # Cray wrapper take care of everything! set(MPI_LIBRARIES "") set(MPI_C_LIBRARIES "") set(MPI_CXX_LIBRARIES "") # ~~~ # instead of -rdynamic, cray wrapper needs either -dynamic or -static(default) # also cray compiler needs fPIC flag # ~~~ if(COMPILE_LIBRARY_TYPE STREQUAL "SHARED") set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "-dynamic") # TODO: add Cray compiler flag configurations in CompilerFlagsHelpers.cmake if(CMAKE_C_COMPILER_IS_CRAY) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") endif() else() set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") endif() else() # default is shared library if(NOT DEFINED COMPILE_LIBRARY_TYPE) set(COMPILE_LIBRARY_TYPE "SHARED") endif() endif() ================================================ FILE: CMake/GitRevision.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # For now use simple approach to get version information as git is often # avaialble on the machine where we are building from source # ~~~ find_package(Git) if(GIT_FOUND) # get last commit sha1 execute_process( COMMAND ${GIT_EXECUTABLE} -c log.showSignature=false log -1 --format=%h WORKING_DIRECTORY ${CORENEURON_PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_REVISION_SHA1 ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) # get last commit date execute_process( COMMAND ${GIT_EXECUTABLE} -c log.showSignature=false show -s --format=%ci WORKING_DIRECTORY ${CORENEURON_PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_REVISION_DATE ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) set(CN_GIT_REVISION "${GIT_REVISION_SHA1} (${GIT_REVISION_DATE})") else() set(CN_GIT_REVISION "unknown") endif() ================================================ FILE: CMake/MakefileBuildOptions.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2022 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ============================================================================= # NMODL CLI options : common and backend specific # ============================================================================= # ~~~ # if user pass arguments then use those as common arguments # note that inlining is done by default # ~~~ set(NMODL_COMMON_ARGS "passes --inline") if(NOT "${CORENRN_NMODL_FLAGS}" STREQUAL "") string(APPEND NMODL_COMMON_ARGS " ${CORENRN_NMODL_FLAGS}") endif() set(NMODL_CPU_BACKEND_ARGS "host --c") set(NMODL_ACC_BACKEND_ARGS "host --c acc --oacc") # ============================================================================= # Construct the linker arguments that are used inside nrnivmodl-core (to build libcorenrnmech from # libcoreneuron-core, libcoreneuron-cuda and mechanism object files) and inside nrnivmodl (to link # NEURON's special against CoreNEURON's libcorenrnmech). These are stored in two global properties: # CORENRN_LIB_LINK_FLAGS (used by NEURON/nrnivmodl to link special against CoreNEURON) and # CORENRN_LIB_LINK_DEP_FLAGS (used by CoreNEURON/nrnivmodl-core to link libcorenrnmech.so). # Conceptually: CORENRN_LIB_LINK_FLAGS = -lcorenrnmech $CORENRN_LIB_LINK_DEP_FLAGS # ============================================================================= if(NOT CORENRN_ENABLE_SHARED) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--whole-archive") endif() set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -lcorenrnmech") if(NOT CORENRN_ENABLE_SHARED) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -Wl,--no-whole-archive") endif() # Essentially we "just" want to unpack the CMake dependencies of the `coreneuron-core` target into a # plain string that we can bake into the Makefiles in both NEURON and CoreNEURON. function(coreneuron_process_library_path library) get_filename_component(library_dir "${library}" DIRECTORY) if(NOT library_dir) # In case target is not a target but is just the name of a library, e.g. "dl" set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${library}") elseif("${library_dir}" MATCHES "^(/lib|/lib64|/usr/lib|/usr/lib64)$") # e.g. /usr/lib64/libpthread.so -> -lpthread TODO: consider using # https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_IMPLICIT_LINK_DIRECTORIES.html, or # dropping this special case entirely get_filename_component(libname ${library} NAME_WE) string(REGEX REPLACE "^lib" "" libname ${libname}) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${libname}") else() # It's a full path, include that on the line set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -Wl,-rpath,${library_dir} ${library}") endif() endfunction() function(coreneuron_process_target target) if(TARGET ${target}) if(NOT target STREQUAL "coreneuron-core") # This is a special case: libcoreneuron-core.a is manually unpacked into .o files by the # nrnivmodl-core Makefile, so we do not want to also emit an -lcoreneuron-core argument. get_target_property(target_inc_dirs ${target} INTERFACE_INCLUDE_DIRECTORIES) if(target_inc_dirs) foreach(inc_dir_genex ${target_inc_dirs}) string(GENEX_STRIP "${inc_dir_genex}" inc_dir) if(inc_dir) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_EXTRA_COMPILE_FLAGS " -I${inc_dir}") endif() endforeach() endif() get_target_property(target_imported ${target} IMPORTED) if(target_imported) # In this case we can extract the full path to the library get_target_property(target_location ${target} LOCATION) coreneuron_process_library_path(${target_location}) else() # This is probably another of our libraries, like -lcoreneuron-cuda. We might need to add -L # and an RPATH later. set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_DEP_FLAGS " -l${target}") endif() endif() get_target_property(target_libraries ${target} LINK_LIBRARIES) if(target_libraries) foreach(child_target ${target_libraries}) coreneuron_process_target(${child_target}) endforeach() endif() return() endif() coreneuron_process_library_path("${target}") endfunction() coreneuron_process_target(coreneuron-core) get_property(CORENRN_LIB_LINK_DEP_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_DEP_FLAGS) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " ${CORENRN_LIB_LINK_DEP_FLAGS}") # In static builds then NEURON uses dlopen(nullptr, ...) to look for the corenrn_embedded_run # symbol, which comes from libcoreneuron-core.a and gets included in libcorenrnmech. if(NOT CORENRN_ENABLE_SHARED) set_property(GLOBAL APPEND_STRING PROPERTY CORENRN_LIB_LINK_FLAGS " -rdynamic") endif() get_property(CORENRN_EXTRA_COMPILE_FLAGS GLOBAL PROPERTY CORENRN_EXTRA_COMPILE_FLAGS) get_property(CORENRN_LIB_LINK_FLAGS GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS) # Detect if --start-group and --end-group are valid linker arguments. These are typically needed # when linking mutually-dependent .o files (or where we don't know the correct order) on Linux, but # they are not needed *or* recognised by the macOS linker. if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18) include(CheckLinkerFlag) check_linker_flag(CXX -Wl,--start-group CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) elseif(CMAKE_SYSTEM_NAME MATCHES Linux) # Assume that --start-group and --end-group are only supported on Linux set(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP ON) endif() if(CORENRN_CXX_LINKER_SUPPORTS_START_GROUP) set(CORENEURON_LINKER_START_GROUP -Wl,--start-group) set(CORENEURON_LINKER_END_GROUP -Wl,--end-group) endif() # Things that used to be in CORENRN_LIB_LINK_FLAGS: -lrt -L${CMAKE_HOST_SYSTEM_PROCESSOR} # -L${caliper_LIB_DIR} -l${CALIPER_LIB} # ============================================================================= # Turn CORENRN_COMPILE_DEFS into a list of -DFOO[=BAR] options. # ============================================================================= list(TRANSFORM CORENRN_COMPILE_DEFS PREPEND -D OUTPUT_VARIABLE CORENRN_COMPILE_DEF_FLAGS) # ============================================================================= # Extra link flags that we need to include when linking libcorenrnmech.{a,so} in CoreNEURON but that # do not need to be passed to NEURON to use when linking nrniv/special (why?) # ============================================================================= string(JOIN " " CORENRN_COMMON_LDFLAGS ${CORENRN_LIB_LINK_DEP_FLAGS} ${CORENRN_EXTRA_LINK_FLAGS}) if(CORENRN_SANITIZER_LIBRARY_DIR) string(APPEND CORENRN_COMMON_LDFLAGS " -Wl,-rpath,${CORENRN_SANITIZER_LIBRARY_DIR}") endif() string(JOIN " " CORENRN_SANITIZER_ENABLE_ENVIRONMENT_STRING ${CORENRN_SANITIZER_ENABLE_ENVIRONMENT}) # ============================================================================= # compile flags : common to all backend # ============================================================================= string(TOUPPER "${CMAKE_BUILD_TYPE}" _BUILD_TYPE) string( JOIN " " CORENRN_CXX_FLAGS ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${_BUILD_TYPE}} ${CMAKE_CXX17_STANDARD_COMPILE_OPTION} ${NVHPC_ACC_COMP_FLAGS} ${NVHPC_CXX_INLINE_FLAGS} ${CORENRN_COMPILE_DEF_FLAGS} ${CORENRN_EXTRA_MECH_CXX_FLAGS} ${CORENRN_EXTRA_COMPILE_FLAGS}) # ============================================================================= # nmodl/mod2c related options : TODO # ============================================================================= # name of nmodl/mod2c binary get_filename_component(nmodl_name ${CORENRN_MOD2CPP_BINARY} NAME) set(nmodl_binary_name ${nmodl_name}) ================================================ FILE: CMake/OpenAccHelper.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # Helper to parse X.Y[.{anything] into X.Y function(cnrn_parse_version FULL_VERSION) cmake_parse_arguments(PARSE_ARGV 1 CNRN_PARSE_VERSION "" "OUTPUT_MAJOR_MINOR" "") if(NOT "${CNRN_PARSE_VERSION_UNPARSED_ARGUMENTS}" STREQUAL "") message( FATAL_ERROR "cnrn_parse_version got unexpected arguments: ${CNRN_PARSE_VERSION_UNPARSED_ARGUMENTS}") endif() string(FIND ${FULL_VERSION} . first_dot) math(EXPR first_dot_plus_one "${first_dot}+1") string(SUBSTRING ${FULL_VERSION} ${first_dot_plus_one} -1 minor_and_later) string(FIND ${minor_and_later} . second_dot_relative) if(${first_dot} EQUAL -1 OR ${second_dot_relative} EQUAL -1) message(FATAL_ERROR "Failed to parse major.minor from ${FULL_VERSION}") endif() math(EXPR second_dot_plus_one "${first_dot}+${second_dot_relative}+1") string(SUBSTRING ${FULL_VERSION} 0 ${second_dot_plus_one} major_minor) set(${CNRN_PARSE_VERSION_OUTPUT_MAJOR_MINOR} ${major_minor} PARENT_SCOPE) endfunction() # ============================================================================= # Prepare compiler flags for GPU target # ============================================================================= if(CORENRN_ENABLE_GPU) # Get the NVC++ version number for use in nrnivmodl_core_makefile.in cnrn_parse_version(${CMAKE_CXX_COMPILER_VERSION} OUTPUT_MAJOR_MINOR CORENRN_NVHPC_MAJOR_MINOR_VERSION) # Enable cudaProfiler{Start,Stop}() behind the Instrumentor::phase... APIs list(APPEND CORENRN_COMPILE_DEFS CORENEURON_CUDA_PROFILING CORENEURON_ENABLE_GPU) # Plain C++ code in CoreNEURON may need to use CUDA runtime APIs for, for example, starting and # stopping profiling. This makes sure those headers can be found. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # cuda unified memory support if(CORENRN_ENABLE_CUDA_UNIFIED_MEMORY) list(APPEND CORENRN_COMPILE_DEFS CORENEURON_UNIFIED_MEMORY) endif() if(${CMAKE_VERSION} VERSION_LESS 3.17) # Hopefully we can drop this soon. Parse ${CMAKE_CUDA_COMPILER_VERSION} into a shorter X.Y # version without any patch version. if(NOT ${CMAKE_CUDA_COMPILER_ID} STREQUAL "NVIDIA") message(FATAL_ERROR "Unsupported CUDA compiler ${CMAKE_CUDA_COMPILER_ID}") endif() cnrn_parse_version(${CMAKE_CUDA_COMPILER_VERSION} OUTPUT_MAJOR_MINOR CORENRN_CUDA_VERSION_SHORT) else() # This is a lazy way of getting the major/minor versions separately without parsing # ${CMAKE_CUDA_COMPILER_VERSION} find_package(CUDAToolkit 9.0 REQUIRED) # Be a bit paranoid if(NOT ${CMAKE_CUDA_COMPILER_VERSION} STREQUAL ${CUDAToolkit_VERSION}) message( FATAL_ERROR "CUDA compiler (${CMAKE_CUDA_COMPILER_VERSION}) and toolkit (${CUDAToolkit_VERSION}) versions are not the same!" ) endif() set(CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}") endif() # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP # code is compiled with the same CUDA version as the explicit CUDA code. set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT}") # Combining -gpu=lineinfo with -O0 -g gives a warning: Conflicting options --device-debug and # --generate-line-info specified, ignoring --generate-line-info option if(CMAKE_BUILD_TYPE STREQUAL "Debug") string(APPEND NVHPC_ACC_COMP_FLAGS ",debug") else() string(APPEND NVHPC_ACC_COMP_FLAGS ",lineinfo") endif() # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the # same default compute capabilities as each other, particularly on GPU-less build machines. foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES}) string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}") endforeach() if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP") # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available # for a region then prefer OpenMP. list(APPEND CORENRN_COMPILE_DEFS CORENEURON_PREFER_OPENMP_OFFLOAD) string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu") elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC") # Only enable OpenACC offload for GPU string(APPEND NVHPC_ACC_COMP_FLAGS " -acc") else() message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers") endif() string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}") # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors. # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined # for OpenACC code generation. set(NVHPC_CXX_INLINE_FLAGS "-Mautoinline") endif() # ============================================================================= # Initialise global properties that will be used by NEURON to link with CoreNEURON # ============================================================================= if(CORENRN_ENABLE_GPU) # CORENRN_LIB_LINK_FLAGS is the full set of flags needed to link against libcorenrnmech.so: # something like `-acc -lcorenrnmech ...`. CORENRN_NEURON_LINK_FLAGS only contains flags that need # to be used when linking the NEURON Python module to make sure it is able to dynamically load # libcorenrnmech.so. set_property(GLOBAL PROPERTY CORENRN_LIB_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") if(CORENRN_ENABLE_SHARED) # Because of # https://forums.developer.nvidia.com/t/dynamically-loading-an-openacc-enabled-shared-library-from-an-executable-compiled-with-nvc-does-not-work/210968 # we have to tell NEURON to pass OpenACC flags when linking special, otherwise we end up with an # `nrniv` binary that cannot dynamically load CoreNEURON in shared-library builds. set_property(GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS "${NVHPC_ACC_COMP_FLAGS}") endif() endif() # NEURON needs to have access to this when CoreNEURON is built as a submodule. If CoreNEURON is # installed externally then this is set via coreneuron-config.cmake set_property(GLOBAL PROPERTY CORENRN_ENABLE_SHARED ${CORENRN_ENABLE_SHARED}) if(CORENRN_HAVE_NVHPC_COMPILER) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 20.7) # https://forums.developer.nvidia.com/t/many-all-diagnostic-numbers-increased-by-1-from-previous-values/146268/3 # changed the numbering scheme in newer versions. The following list is from a clean start 13 # August 2021. It would clearly be nicer to apply these suppressions only to relevant files. # Examples of the suppressed warnings are given below. # ~~~ # "include/Random123/array.h", warning #111-D: statement is unreachable # "include/Random123/features/sse.h", warning #550-D: variable "edx" was set but never used # ~~~ set(CORENEURON_CXX_WARNING_SUPPRESSIONS --diag_suppress=111,550) # This one can be a bit more targeted # ~~~ # "boost/test/unit_test_log.hpp", warning #612-D: overloaded virtual function "..." is only partially overridden in class "..." # ~~~ set(CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS --diag_suppress=612) # Extra suppressions for .cpp files translated from .mod files. # ~~~ # "x86_64/corenrn/mod2c/pattern.cpp", warning #161-D: unrecognized #pragma # "x86_64/corenrn/mod2c/svclmp.cpp", warning #177-D: variable "..." was declared but never referenced # ~~~ string(JOIN " " CORENEURON_TRANSLATED_CODE_COMPILE_FLAGS ${CORENEURON_CXX_WARNING_SUPPRESSIONS} --diag_suppress=161,177) endif() endif() ================================================ FILE: CMake/TestScriptUtils.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # Utility functions for manipulating test labels and producing # tests from scripts: # # 1. add_test_class(label [label2 ...]) # # Create a target with name test-label (or test-label-label2 etc.) # which runs only those tests possessing all of the supplied labels. # # # 2. add_test_label(name label ...) # # Add the given labels to the test 'name'. # # # 3. add_test_script(name script interp) # # Add a test 'name' that runs the given script, using the # interpreter 'interp'. If no interpreter is supplied, # the script will be run with /bin/sh. # # Uses the following variables to customize the new test: # * TEST_LABEL, ${NAME}_TEST_LABEL # If defined, apply the label(s) in these variable to the # new test. # * TEST_ARGS, ${NAME}_TEST_ARGS # Additional arguments to pass to the script. # ${NAME}_TEST_ARGS takes priority over TEST_ARGS. # * TEST_ENVIRONMENT # Additional environment variables to define for the test; # added to test properties. # * TEST_PREFIX, ${NAME}_TEST_PREFIX # If defined, preface the interpreter with this prefix. # ${NAME}_TEST_PREFIX takes priority over TEST_PREFIX. # ~~~ function(add_test_label NAME) set_property( TEST ${NAME} APPEND PROPERTY LABELS ${ARGN}) # create test classes for each label foreach(L ${ARGN}) add_test_class(${L}) endforeach() endfunction() function(add_test_script NAME SCRIPT INTERP) set(RUN_PREFIX ${TEST_PREFIX}) if(${NAME}_TEST_PREFIX) set(RUN_PREFIX ${${NAME}_TEST_PREFIX}) endif() if(NOT INTERP) set(INTERP "/bin/sh") endif() set(RUN_ARGS ${TEST_ARGS}) if(${NAME}_TEST_ARGS) set(RUN_ARGS ${${NAME}_TEST_ARGS}) endif() set(SCRIPT_PATH "${SCRIPT}") if(NOT IS_ABSOLUTE "${SCRIPT_PATH}") set(SCRIPT_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${SCRIPT_PATH}") endif() add_test( NAME ${NAME} COMMAND ${RUN_PREFIX} ${INTERP} "${SCRIPT_PATH}" ${RUN_ARGS} WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") # Add test labels set(TEST_LABELS ${TEST_LABEL} ${${NAME}_TEST_LABEL}) if(TEST_LABELS) add_test_label(${NAME} ${TEST_LABELS}) endif() if(TEST_ENVIRONMENT) set_property(TEST ${NAME} PROPERTY ENVIRONMENT ${TEST_ENVIRONMENT}) endif() endfunction() function(add_test_class) string(REPLACE ";" "-" TEST_SUFFIX "${ARGN}") string(REPLACE ";" "$$;-L;^" TEST_LOPTS "${ARGN}") if(NOT TARGET test-${TEST_SUFFIX}) add_custom_target( "test-${TEST_SUFFIX}" COMMAND ${CMAKE_CTEST_COMMAND} -L ^${TEST_LOPTS}$$ WORKING_DIRECTORY ${${PROJECT_NAME}_BINARY_DIR} COMMENT "Running all ${ARGN} tests") endif() endfunction() ================================================ FILE: CMake/config/CompilerFlagsHelpers.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # CompilerFlagsHelpers.cmake # set of Convenience functions for portable compiler flags # ~~~ set(SUPPORTED_COMPILER_LANGUAGE_LIST "CXX") # detect compiler foreach(COMPILER_LANGUAGE ${SUPPORTED_COMPILER_LANGUAGE_LIST}) if(CMAKE_${COMPILER_LANGUAGE}_COMPILER_ID STREQUAL "XL") set(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_XLC ON) elseif(CMAKE_${COMPILER_LANGUAGE}_COMPILER_ID STREQUAL "Intel") set(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_ICC ON) elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") set(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_MSVC) elseif(${CMAKE_${COMPILER_LANGUAGE}_COMPILER_ID} STREQUAL "Clang") set(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_CLANG ON) elseif(CMAKE_${COMPILER_LANGUAGE}_COMPILER_ID STREQUAL "GNU") set(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_GCC ON) elseif(CMAKE_${COMPILER_LANGUAGE}_COMPILER_ID STREQUAL "Cray") set(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_CRAY ON) endif() endforeach() foreach(COMPILER_LANGUAGE ${SUPPORTED_COMPILER_LANGUAGE_LIST}) # XLC compiler if(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_XLC) # ~~~ # XLC -qinfo=all is awfully verbose on any platforms that use the GNU STL # Enable by default only the relevant one # ~~~ set(CMAKE_${COMPILER_LANGUAGE}_WARNING_ALL "-qformat=all -qinfo=lan:trx:ret:zea:cmp:ret") set(CMAKE_${COMPILER_LANGUAGE}_DEBUGINFO_FLAGS "-g") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NONE "-O0") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NORMAL "-O2") set(CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE "-O3") set(CMAKE_${COMPILER_LANGUAGE}_OPT_FASTEST "-O5") set(CMAKE_${COMPILER_LANGUAGE}_STACK_PROTECTION "-qstackprotect") set(CMAKE_${COMPILER_LANGUAGE}_POSITION_INDEPENDENT "-qpic=small") set(CMAKE_${COMPILER_LANGUAGE}_VECTORIZE "-qhot") set(ADDITIONAL_THREADSAFE_FLAGS "-qthreaded") set(IGNORE_UNKNOWN_PRAGMA_FLAGS "-qsuppress=1506-224") # Microsoft compiler elseif(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_MSVC) set(CMAKE_${COMPILER_LANGUAGE}_DEBUGINFO_FLAGS "-Zi") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NONE "") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NORMAL "-O2") set(CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE "-O2") set(CMAKE_${COMPILER_LANGUAGE}_OPT_FASTEST "-O2") set(CMAKE_${COMPILER_LANGUAGE}_STACK_PROTECTION "-GS") # enable by default on MSVC set(CMAKE_${COMPILER_LANGUAGE}_POSITION_INDEPENDENT "") # GCC elseif(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_GCC) set(CMAKE_${COMPILER_LANGUAGE}_WARNING_ALL "-Wall") set(CMAKE_${COMPILER_LANGUAGE}_DEBUGINFO_FLAGS "-g") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NONE "-O0") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NORMAL "-O2") set(CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE "-O3") set(CMAKE_${COMPILER_LANGUAGE}_OPT_FASTEST "-Ofast -march=native") set(CMAKE_${COMPILER_LANGUAGE}_STACK_PROTECTION "-fstack-protector") set(CMAKE_${COMPILER_LANGUAGE}_POSITION_INDEPENDENT "-fPIC") set(CMAKE_${COMPILER_LANGUAGE}_VECTORIZE "-ftree-vectorize") set(IGNORE_UNKNOWN_PRAGMA_FLAGS "-Wno-unknown-pragmas") if(CMAKE_${COMPILER_LANGUAGE}_COMPILER_VERSION VERSION_GREATER "4.7.0") set(CMAKE_${COMPILER_LANGUAGE}_LINK_TIME_OPT "-flto") endif() if((CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^ppc") OR (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^power" )) # ppc arch do not support -march= syntax set(CMAKE_${COMPILER_LANGUAGE}_GEN_NATIVE "-mcpu=native") else() set(CMAKE_${COMPILER_LANGUAGE}_GEN_NATIVE "-march=native") endif() # CLANG elseif(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_CLANG) set(CMAKE_${COMPILER_LANGUAGE}_WARNING_ALL "-Wall") set(CMAKE_${COMPILER_LANGUAGE}_DEBUGINFO_FLAGS "-g") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NONE "-O0") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NORMAL "-O2") set(CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE "-O3") set(CMAKE_${COMPILER_LANGUAGE}_OPT_FASTEST "-Ofast -march=native") set(CMAKE_${COMPILER_LANGUAGE}_STACK_PROTECTION "-fstack-protector") set(CMAKE_${COMPILER_LANGUAGE}_POSITION_INDEPENDENT "-fPIC") # Force same ld behavior as when called from gcc --as-needed forces the linker to check whether # a dynamic library mentioned in the command line is actually needed by the objects being # linked. Symbols needed in shared objects are already linked when building that library. set(CMAKE_EXE_LINKER_FLAGS "-Wl,--as-needed") set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--as-needed") # rest of the world else() set(CMAKE_${COMPILER_LANGUAGE}_WARNING_ALL "-Wall") set(CMAKE_${COMPILER_LANGUAGE}_DEBUGINFO_FLAGS "-g") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NONE "-O0") set(CMAKE_${COMPILER_LANGUAGE}_OPT_NORMAL "-O2") set(CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE "-O3") set(CMAKE_${COMPILER_LANGUAGE}_OPT_FASTEST "-O3") set(CMAKE_${COMPILER_LANGUAGE}_STACK_PROTECTION "") set(CMAKE_${COMPILER_LANGUAGE}_POSITION_INDEPENDENT "-fPIC") set(CMAKE_${COMPILER_LANGUAGE}_VECTORIZE "") if(CMAKE_${COMPILER_LANGUAGE}_COMPILER_IS_ICC) # unknown compiler flags produce error on Cray and hence just set this for intel now set(IGNORE_UNKNOWN_PRAGMA_FLAGS "-Wno-unknown-pragmas") # Intel O3 is extreme set(CMAKE_${COMPILER_LANGUAGE}_OPT_AGGRESSIVE "-O2") endif() if(CMAKE_${COMPILER_LANGUAGE}_COMPILER_ID STREQUAL "PGI") set(CMAKE_${COMPILER_LANGUAGE}_WARNING_ALL "") endif() endif() endforeach() # =============================================================================== # Allow undefined reference in shared library as mod files will be linked later # =============================================================================== if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang" OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(UNDEFINED_SYMBOLS_IGNORE_FLAG "-undefined dynamic_lookup") string(APPEND CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS " ${UNDEFINED_SYMBOLS_IGNORE_FLAG}") string(APPEND CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS " ${UNDEFINED_SYMBOLS_IGNORE_FLAG}") endif() ================================================ FILE: CMake/config/ReleaseDebugAutoFlags.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # ReleaseDebugAutoFlags.cmake # Release / Debug configuration helper # ~~~ # default configuration if(NOT CMAKE_BUILD_TYPE AND (NOT CMAKE_CONFIGURATION_TYPES)) set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Choose the type of build." FORCE) message(STATUS "Setting build type to '${CMAKE_BUILD_TYPE}' as none was specified.") endif() # ============================================================================= # Different build types # ============================================================================= # ~~~ # Debug : Optimized for debugging, include debug symbols # Release : Release mode, no debuginfo # RelWithDebInfo : Distribution mode, basic optimizations for potable code with debuginfos # Fast : Maximum level of optimization. Target native architecture, not portable code # ~~~ include(CompilerFlagsHelpers) # ~~~ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_OPT_NORMAL}") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_DEBUGINFO_FLAGS} ${CMAKE_C_OPT_NONE} ${CMAKE_C_STACK_PROTECTION}") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_DEBUGINFO_FLAGS} ${CMAKE_C_OPT_NORMAL}") set(CMAKE_C_FLAGS_FAST " ${CMAKE_C_OPT_FASTEST} ${CMAKE_C_LINK_TIME_OPT} ${CMAKE_C_GEN_NATIVE}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_OPT_NORMAL}") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_DEBUGINFO_FLAGS} ${CMAKE_CXX_OPT_NONE} ${CMAKE_CXX_STACK_PROTECTION}") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_DEBUGINFO_FLAGS} ${CMAKE_CXX_OPT_NORMAL}") set(CMAKE_CXX_FLAGS_FAST " ${CMAKE_CXX_OPT_FASTEST} ${CMAKE_CXX_LINK_TIME_OPT} ${CMAKE_CXX_GEN_NATIVE}") # ~~~ ================================================ FILE: CMake/config/SetRpath.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # enable @rpath in the install name for any shared library being built set(CMAKE_MACOSX_RPATH 1) # ~~~ # On platforms like bgq, xlc didn't like rpath with static build and similar # issue was seen on Cray # ~~~ if(NOT CRAY_SYSTEM) # use, i.e. don't skip the full RPATH for the build tree set(CMAKE_SKIP_BUILD_RPATH FALSE) # when building, don't use the install RPATH already but later on when installing set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) # ~~~ # add the automatically determined parts of the RPATH which point to directories # outside the build tree to the install RPATH # ~~~ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) set(LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib") # the RPATH to be used when installing, but only if it's not a system directory list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${LIB_INSTALL_DIR}" isSystemDir) if("${isSystemDir}" STREQUAL "-1") set(CMAKE_INSTALL_RPATH "${LIB_INSTALL_DIR}") endif("${isSystemDir}" STREQUAL "-1") endif() ================================================ FILE: CMake/config/TestHelpers.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # TestHelpers.cmake # set of Convenience functions for unit testing with cmake # ~~~ # enable or disable detection of SLURM and MPIEXEC option(AUTO_TEST_WITH_SLURM "Add srun as test prefix in a SLURM environment" TRUE) option(AUTO_TEST_WITH_MPIEXEC "Add mpiexec as test prefix in a MPICH2/OpenMPI environment" TRUE) # ~~~ # Basic SLURM support the prefix "srun" is added to any test in the environment/ For a # slurm test execution, simply run "salloc [your_exec_parameters] ctest" # ~~~ if(AUTO_TEST_WITH_SLURM) if(NOT DEFINED SLURM_SRUN_COMMAND) find_program( SLURM_SRUN_COMMAND NAMES "srun" HINTS "${SLURM_ROOT}/bin" QUIET) endif() if(SLURM_SRUN_COMMAND) set(TEST_EXEC_PREFIX_DEFAULT "${SLURM_SRUN_COMMAND}") set(TEST_MPI_EXEC_PREFIX_DEFAULT "${SLURM_SRUN_COMMAND}") set(TEST_MPI_EXEC_BIN_DEFAULT "${SLURM_SRUN_COMMAND}") set(TEST_WITH_SLURM ON) endif() endif() # Basic mpiexec support, will just forward mpiexec as prefix if(AUTO_TEST_WITH_MPIEXEC AND NOT TEST_WITH_SLURM) if(NOT DEFINED MPIEXEC) find_program( MPIEXEC NAMES "mpiexec" HINTS "${MPI_ROOT}/bin") endif() if(MPIEXEC) set(TEST_MPI_EXEC_PREFIX_DEFAULT "${MPIEXEC}") set(TEST_MPI_EXEC_BIN_DEFAULT "${MPIEXEC}") set(TEST_WITH_MPIEXEC ON) endif() endif() # ~~~ # MPI executor program path without arguments used for testing. # default: srun or mpiexec if found # ~~~ set(TEST_MPI_EXEC_BIN "${TEST_MPI_EXEC_BIN_DEFAULT}" CACHE STRING "path of the MPI executor (mpiexec, mpirun) for test execution") # ~~~ # Test execution prefix. Override this variable for any execution prefix required # in clustered environment # # To specify manually a command with argument, e.g -DTEST_EXEC_PREFIX="/usr/bin/srun;-n;-4" # for a srun execution with 4 nodes # # default: srun if found # ~~~ set(TEST_EXEC_PREFIX "${TEST_EXEC_PREFIX_DEFAULT}" CACHE STRING "prefix command for the test executions") # ~~~ # Test execution prefix specific for MPI programs. # # To specify manually a command with argument, use the cmake list syntax. e.g # -DTEST_EXEC_PREFIX="/usr/bin/mpiexec;-n;-4" for an MPI execution with 4 nodes # # default: srun or mpiexec if found # ~~~ set(TEST_MPI_EXEC_PREFIX "${TEST_MPI_EXEC_PREFIX_DEFAULT}" CACHE STRING "prefix command for the MPI test executions") ================================================ FILE: CMake/coreneuron-config.cmake.in ================================================ # ============================================================================= # Copyright (C) 2016-2022 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # coreneuron-config.cmake - package configuration file get_filename_component(CONFIG_PATH "${CMAKE_CURRENT_LIST_FILE}" PATH) set(CORENRN_VERSION_MAJOR @PROJECT_VERSION_MAJOR@) set(CORENRN_VERSION_MINOR @PROJECT_VERSION_MINOR@) set(CORENRN_VERSION_PATCH @PROJECT_VERSION_PATCH@) set(CORENRN_ENABLE_GPU @CORENRN_ENABLE_GPU@) set(CORENRN_ENABLE_NMODL @CORENRN_ENABLE_NMODL@) set(CORENRN_ENABLE_REPORTING @CORENRN_ENABLE_REPORTING@) set(CORENRN_ENABLE_SHARED @CORENRN_ENABLE_SHARED@) set(CORENRN_LIB_LINK_FLAGS "@CORENRN_LIB_LINK_FLAGS@") set(CORENRN_NEURON_LINK_FLAGS "@CORENRN_NEURON_LINK_FLAGS@") find_path(CORENEURON_INCLUDE_DIR "coreneuron/coreneuron.h" HINTS "${CONFIG_PATH}/../../include") find_path( CORENEURON_LIB_DIR NAMES libcorenrnmech.a libcorenrnmech.so libcorenrnmech.dylib HINTS "${CONFIG_PATH}/../../lib") include(${CONFIG_PATH}/coreneuron.cmake) ================================================ FILE: CMake/packages/FindSphinx.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= find_program( SPHINX_EXECUTABLE NAMES sphinx-build DOC "/path/to/sphinx-build") include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Sphinx "Failed to find sphinx-build executable" SPHINX_EXECUTABLE) ================================================ FILE: CMake/packages/Findlikwid.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # Findlikwid # ------------- # # Find likwid # # Find the likwid RRZE Performance Monitoring and Benchmarking Suite # # Using likwid: # # :: # set(LIKWID_DIR "" CACHE PATH "Path likwid performance monitoring and benchmarking suite") # find_package(likwid REQUIRED) # include_directories(${likwid_INCLUDE_DIRS}) # target_link_libraries(foo ${likwid_LIBRARIES}) # # This module sets the following variables: # # :: # # likwid_FOUND - set to true if the library is found # likwid_INCLUDE - list of required include directories # likwid_LIBRARIES - list of required library directories # ~~~ find_path(likwid_INCLUDE_DIRS "likwid.h" HINTS "${LIKWID_DIR}/include") find_library(likwid_LIBRARIES likwid HINTS "${LIKWID_DIR}/lib") # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) find_package_handle_standard_args(likwid REQUIRED_VARS likwid_INCLUDE_DIRS likwid_LIBRARIES) ================================================ FILE: CMake/packages/Findnmodl.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # Findnmodl # ------------- # # Find nmodl # # Find the nmodl Blue Brain HPC utils library # # Using nmodl: # # :: # set(CORENRN_NMODL_DIR "" CACHE PATH "Path to nmodl source-to-source compiler installation") # find_package(nmodl REQUIRED) # include_directories(${nmodl_INCLUDE_DIRS}) # target_link_libraries(foo ${nmodl_LIBRARIES}) # # This module sets the following variables: # # :: # # nmodl_FOUND - set to true if the library is found # nmodl_INCLUDE - list of required include directories # nmodl_BINARY - the nmodl binary # ~~~ # UNIX paths are standard, no need to write. find_program( nmodl_BINARY NAMES nmodl${CMAKE_EXECUTABLE_SUFFIX} HINTS "${CORENRN_NMODL_DIR}/bin" QUIET) find_path(nmodl_INCLUDE "nmodl/fast_math.hpp" HINTS "${CORENRN_NMODL_DIR}/include") find_path(nmodl_PYTHONPATH "nmodl/__init__.py" HINTS "${CORENRN_NMODL_DIR}/lib") # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) find_package_handle_standard_args( nmodl FOUND_VAR nmodl_FOUND REQUIRED_VARS nmodl_BINARY nmodl_INCLUDE nmodl_PYTHONPATH) ================================================ FILE: CMake/packages/Findreportinglib.cmake ================================================ # ============================================================================= # Copyright (C) 2016-2021 Blue Brain Project # # See top-level LICENSE file for details. # ============================================================================= # ~~~ # Findreportinglib # ------------- # # Find reportinglib # # Find the reportinglib Blue Brain HPC utils library # # Using reportinglib: # # :: # # find_package(reportinglib REQUIRED) # include_directories(${reportinglib_INCLUDE_DIRS}) # target_link_libraries(foo ${reportinglib_LIBRARIES}) # # This module sets the following variables: # # :: # # reportinglib_FOUND - set to true if the library is found # reportinglib_INCLUDE_DIRS - list of required include directories # reportinglib_LIBRARIES - list of libraries to be linked # ~~~ # UNIX paths are standard, no need to write. find_path(reportinglib_INCLUDE_DIR reportinglib/Report.h) find_library(reportinglib_LIBRARY reportinglib) get_filename_component(reportinglib_LIB_DIR ${reportinglib_LIBRARY} DIRECTORY) find_program(reportinglib_somaDump somaDump ${reportinglib_LIB_DIR}/../bin) # Checks 'REQUIRED', 'QUIET' and versions. include(FindPackageHandleStandardArgs) find_package_handle_standard_args( reportinglib FOUND_VAR reportinglib_FOUND REQUIRED_VARS reportinglib_INCLUDE_DIR reportinglib_LIBRARY reportinglib_LIB_DIR) ================================================ FILE: CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= cmake_minimum_required(VERSION 3.15 FATAL_ERROR) # CoreNEURON's version jumped from 1.0 to 8.2.0 with the introduction of the NRN_VERSION_* macros # for use in VERBATIM blocks. Starting from this version, the NEURON and CoreNEURON versions are # locked together. A version has to be hardcoded here to handle the case that CoreNEURON is built # standalone. project( coreneuron VERSION 9.0.0 LANGUAGES CXX) # ~~~ # It is a bad idea having floating point versions, since macros cant handle them # We therefore, have version as an int, which is pretty much standard # ~~~ math(EXPR CORENEURON_VERSION_COMBINED "${coreneuron_VERSION_MAJOR} * 100 + ${coreneuron_VERSION_MINOR}") # ============================================================================= # CMake common project settings # ============================================================================= set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Empty or one of Debug, Release, RelWithDebInfo") if(NOT "cxx_std_17" IN_LIST CMAKE_CXX_COMPILE_FEATURES) message( FATAL_ERROR "This compiler does not fully support C++17, choose a higher version or another compiler.") endif() # ============================================================================= # Settings to enable project as submodule # ============================================================================= set(CORENEURON_PROJECT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(CORENEURON_PROJECT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(CORENEURON_AS_SUBPROJECT OFF) if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) set(CORENEURON_AS_SUBPROJECT ON) # Make these visible to the parent project (NEURON) so it can do some sanity checking. set_property(GLOBAL PROPERTY CORENRN_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) set_property(GLOBAL PROPERTY CORENRN_VERSION_MINOR ${PROJECT_VERSION_MINOR}) set_property(GLOBAL PROPERTY CORENRN_VERSION_PATCH ${PROJECT_VERSION_PATCH}) endif() if(NOT DEFINED NRN_VERSION_MAJOR OR NOT DEFINED NRN_VERSION_MINOR OR NOT DEFINED NRN_VERSION_PATCH) if(CORENEURON_AS_SUBPROJECT) set(level WARNING) else() set(level STATUS) endif() # Typically in this case CoreNEURON is being built standalone. In this case NRN_VERSION_* macros # resolve to the CoreNEURON version, which is supposed to be moving in lockstep with the NEURON # version. set(NRN_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) set(NRN_VERSION_MINOR ${PROJECT_VERSION_MINOR}) set(NRN_VERSION_PATCH ${PROJECT_VERSION_PATCH}) message(${level} "CoreNEURON could not determine the NEURON version, using the hardcoded " "${NRN_VERSION_MAJOR}.${NRN_VERSION_MINOR}.${NRN_VERSION_PATCH}") endif() # Regardless of whether we are being built as a submodule of NEURON, NRN_VERSION_{MAJOR,MINOR,PATCH} # are now set to the version that we should claim compatibility with when compiling translated MOD # files. Generate a header under a special `generated` prefix in the build directory, so that # -I/path/to/src -I/path/to/build/generated is safe (headers from the source prefix are copied # elsewhere under the build prefix, so there is scope for confusion) configure_file(coreneuron/config/neuron_version.hpp.in generated/coreneuron/config/neuron_version.hpp) # ============================================================================= # Include cmake modules path # ============================================================================= list(APPEND CMAKE_MODULE_PATH ${CORENEURON_PROJECT_SOURCE_DIR}/CMake ${CORENEURON_PROJECT_SOURCE_DIR}/CMake/packages ${CORENEURON_PROJECT_SOURCE_DIR}/CMake/config) # ============================================================================= # HPC Coding Conventions # ============================================================================= set(CODING_CONV_PREFIX "CORENRN") set(CORENRN_3RDPARTY_DIR "external") include(AddHpcCodingConvSubmodule) add_subdirectory(CMake/hpc-coding-conventions/cpp) # ============================================================================= # Enable sanitizer support if the CORENRN_SANITIZERS variable is set # ============================================================================= include(CMake/hpc-coding-conventions/cpp/cmake/sanitizers.cmake) set(CORENRN_EXTRA_CXX_FLAGS "" CACHE STRING "Add extra compile flags for CoreNEURON sources") separate_arguments(CORENRN_EXTRA_CXX_FLAGS) set(CORENRN_EXTRA_MECH_CXX_FLAGS "" CACHE STRING "Add extra compile flags for translated mechanisms") separate_arguments(CORENRN_EXTRA_MECH_CXX_FLAGS) list(APPEND CORENRN_EXTRA_CXX_FLAGS ${CORENRN_SANITIZER_COMPILER_FLAGS}) list(APPEND CORENRN_EXTRA_MECH_CXX_FLAGS ${CORENRN_SANITIZER_COMPILER_FLAGS}) list(APPEND CORENRN_EXTRA_LINK_FLAGS ${CORENRN_SANITIZER_COMPILER_FLAGS}) # ============================================================================= # Include common cmake modules # ============================================================================= include(CheckIncludeFiles) include(ReleaseDebugAutoFlags) include(CrayPortability) include(SetRpath) include(CTest) include(AddRandom123Submodule) include(GitRevision) set(CORENRN_3RDPARTY_DIR external) include(CMake/hpc-coding-conventions/cpp/cmake/3rdparty.cmake) cpp_cc_git_submodule(CLI11 BUILD PACKAGE CLI11 REQUIRED) # ============================================================================= # Build options # ============================================================================= option(CORENRN_ENABLE_OPENMP "Build the CORE NEURON with OpenMP implementation" ON) option(CORENRN_ENABLE_OPENMP_OFFLOAD "Prefer OpenMP target offload to OpenACC" ON) option(CORENRN_ENABLE_TIMEOUT "Enable nrn_timeout implementation" ON) option(CORENRN_ENABLE_REPORTING "Enable use of ReportingLib for soma reports" OFF) option(CORENRN_ENABLE_MPI "Enable MPI-based execution" ON) option(CORENRN_ENABLE_MPI_DYNAMIC "Enable dynamic MPI support" OFF) option(CORENRN_ENABLE_HOC_EXP "Enable wrapping exp with hoc_exp()" OFF) option(CORENRN_ENABLE_SPLAYTREE_QUEUING "Enable use of Splay tree for spike queuing" ON) option(CORENRN_ENABLE_NET_RECEIVE_BUFFER "Enable event buffering in net_receive function" ON) option(CORENRN_ENABLE_NMODL "Enable external nmodl source-to-source compiler" OFF) option(CORENRN_ENABLE_CALIPER_PROFILING "Enable Caliper instrumentation" OFF) option(CORENRN_ENABLE_LIKWID_PROFILING "Enable LIKWID instrumentation" OFF) option(CORENRN_ENABLE_CUDA_UNIFIED_MEMORY "Enable CUDA unified memory support" OFF) option(CORENRN_ENABLE_UNIT_TESTS "Enable unit tests execution" ON) option(CORENRN_ENABLE_GPU "Enable GPU support using OpenACC or OpenMP" OFF) option(CORENRN_ENABLE_SHARED "Enable shared library build" ON) option(CORENRN_ENABLE_LEGACY_UNITS "Enable legacy FARADAY, R, etc" OFF) option(CORENRN_ENABLE_PRCELLSTATE "Enable NRN_PRCELLSTATE debug feature" OFF) set(CORENRN_NMODL_DIR "" CACHE PATH "Path to nmodl source-to-source compiler installation") set(LIKWID_DIR "" CACHE PATH "Path to likwid performance analysis suite") # Older CMake versions label NVHPC as PGI, newer ones label it as NVHPC. if(${CMAKE_CXX_COMPILER_ID} STREQUAL "PGI" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL "NVHPC") set(CORENRN_HAVE_NVHPC_COMPILER ON) else() set(CORENRN_HAVE_NVHPC_COMPILER OFF) endif() set(CORENRN_ACCELERATOR_OFFLOAD "Disabled") if(CORENRN_ENABLE_GPU) # Older CMake versions than 3.15 have not been tested for GPU/CUDA/OpenACC support after # https://github.com/BlueBrain/CoreNeuron/pull/609. # Fail hard and early if we don't have the PGI/NVHPC compiler. if(NOT CORENRN_HAVE_NVHPC_COMPILER) message( FATAL_ERROR "GPU support is available via OpenACC using PGI/NVIDIA compilers." " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++" ) endif() # Set some sensible default CUDA architectures. if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES 70 80) message(STATUS "Setting default CUDA architectures to ${CMAKE_CUDA_ARCHITECTURES}") endif() # See https://gitlab.kitware.com/cmake/cmake/-/issues/23081, this should not be needed according # to the CMake documentation, but it is not clear that any version behaves as documented. if(DEFINED CMAKE_CUDA_HOST_COMPILER) unset(ENV{CUDAHOSTCXX}) endif() # Enable CUDA language support. enable_language(CUDA) # Prefer shared libcudart.so if(${CMAKE_VERSION} VERSION_LESS 3.17) # Ugly workaround from https://gitlab.kitware.com/cmake/cmake/-/issues/17559, remove when # possible if(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES) list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "cudart_static") list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "cudadevrt") list(APPEND CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "cudart") endif() if(CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES) list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES "cudart_static") list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES "cudadevrt") list(APPEND CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES "cudart") endif() else() # nvc++ -cuda implicitly links dynamically to libcudart.so. Setting this makes sure that CMake # does not add -lcudart_static and trigger errors due to mixed dynamic/static linkage. set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) endif() # Patch CUDA_ARCHITECTURES support into older CMake versions if(${CMAKE_VERSION} VERSION_LESS 3.18) foreach(cuda_arch ${CMAKE_CUDA_ARCHITECTURES}) string( APPEND CMAKE_CUDA_FLAGS " --generate-code=arch=compute_${cuda_arch},code=[compute_${cuda_arch},sm_${cuda_arch}]") endforeach() endif() # ~~~ # Needed for the Eigen GPU support Warning suppression (Eigen GPU-related): # 3057 : Warning on ignoring __host__ annotation in some functions # 3085 : Warning on redeclaring a __host__ function as __host__ __device__ # ~~~ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -Xcudafe --diag_suppress=3057,--diag_suppress=3085" ) if(CORENRN_ENABLE_NMODL) # NMODL supports both OpenACC and OpenMP target offload if(CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD) set(CORENRN_ACCELERATOR_OFFLOAD "OpenMP") else() set(CORENRN_ACCELERATOR_OFFLOAD "OpenACC") endif() else() # MOD2C only supports OpenACC offload set(CORENRN_ACCELERATOR_OFFLOAD "OpenACC") endif() endif() # ============================================================================= # Project version from git and project directories # ============================================================================= set(CN_PROJECT_VERSION ${PROJECT_VERSION}) # generate file with version number from git and nrnunits.lib file path configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coreneuron/config/config.cpp.in ${PROJECT_BINARY_DIR}/coreneuron/config/config.cpp @ONLY) # ============================================================================= # Include cmake modules after cmake options # ============================================================================= include(OpenAccHelper) # ============================================================================= # Common dependencies # ============================================================================= find_package(PythonInterp REQUIRED) find_package(Perl REQUIRED) # ============================================================================= # Common build options # ============================================================================= # build mod files for coreneuron list(APPEND CORENRN_COMPILE_DEFS CORENEURON_BUILD) set(CMAKE_REQUIRED_QUIET TRUE) check_include_files(malloc.h have_malloc_h) if(have_malloc_h) list(APPEND CORENRN_COMPILE_DEFS HAVE_MALLOC_H) endif() # ============================================================================= # Build option specific compiler flags # ============================================================================= if(CORENRN_ENABLE_NMODL) # We use Eigen for "small" matrices with thread-level parallelism handled at a higher level; tell # Eigen not to try to multithread internally list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_PARALLELIZE) endif() if(CORENRN_HAVE_NVHPC_COMPILER) # PGI with llvm code generation doesn't have necessary assembly intrinsic headers list(APPEND CORENRN_COMPILE_DEFS EIGEN_DONT_VECTORIZE=1) if(NOT CORENRN_ENABLE_GPU AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 21.11) # Random123 does not play nicely with NVHPC 21.11+'s detection of ABM features if it detects the # compiler to be PGI or NVHPC, see: https://github.com/BlueBrain/CoreNeuron/issues/724 and # https://github.com/DEShawResearch/random123/issues/6. In fact in GPU builds Random123 # (mis)detects nvc++ as nvcc because we pass the -cuda option and we therefore avoid the # problem. If GPU support is disabled, we define R123_USE_INTRIN_H=0 to avoid the problem. list(APPEND CORENRN_COMPILE_DEFS R123_USE_INTRIN_H=0) endif() # CMake versions <3.19 used to add -A when using NVHPC/PGI, which makes the compiler excessively # pedantic. See https://gitlab.kitware.com/cmake/cmake/-/issues/20997. if(CMAKE_VERSION VERSION_LESS 3.19) list(REMOVE_ITEM CMAKE_CXX17_STANDARD_COMPILE_OPTION -A) endif() endif() if(CORENRN_ENABLE_SHARED) set(COMPILE_LIBRARY_TYPE "SHARED") else() set(COMPILE_LIBRARY_TYPE "STATIC") endif() if(CORENRN_ENABLE_MPI) find_package(MPI REQUIRED) list(APPEND CORENRN_COMPILE_DEFS NRNMPI=1) # avoid linking to C++ bindings list(APPEND CORENRN_COMPILE_DEFS MPI_NO_CPPBIND=1) list(APPEND CORENRN_COMPILE_DEFS OMPI_SKIP_MPICXX=1) list(APPEND CORENRN_COMPILE_DEFS MPICH_SKIP_MPICXX=1) else() list(APPEND CORENRN_COMPILE_DEFS NRNMPI=0) list(APPEND CORENRN_COMPILE_DEFS NRN_MULTISEND=0) endif() if(CORENRN_ENABLE_OPENMP) find_package(OpenMP QUIET) if(OPENMP_FOUND) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} ${ADDITIONAL_THREADSAFE_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} ${ADDITIONAL_THREADSAFE_FLAGS}") endif() endif() list(APPEND CORENRN_COMPILE_DEFS LAYOUT=0) if(NOT CORENRN_ENABLE_HOC_EXP) list(APPEND CORENRN_COMPILE_DEFS DISABLE_HOC_EXP) endif() # splay tree required for net_move if(CORENRN_ENABLE_SPLAYTREE_QUEUING) list(APPEND CORENRN_COMPILE_DEFS ENABLE_SPLAYTREE_QUEUING) endif() if(NOT CORENRN_ENABLE_NET_RECEIVE_BUFFER) list(APPEND CORENRN_COMPILE_DEFS NET_RECEIVE_BUFFERING=0) endif() if(NOT CORENRN_ENABLE_TIMEOUT) list(APPEND CORENRN_COMPILE_DEFS DISABLE_TIMEOUT) endif() if(CORENRN_ENABLE_REPORTING) find_package(reportinglib) find_package(sonata) find_program(H5DUMP_EXECUTABLE h5dump) if(reportinglib_FOUND) list(APPEND CORENRN_COMPILE_DEFS ENABLE_BIN_REPORTS) set(ENABLE_BIN_REPORTS_TESTS ON) else() set(reportinglib_INCLUDE_DIR "") set(reportinglib_LIBRARY "") endif() if(sonata_FOUND) if(TARGET sonata::sonata_report) list(APPEND CORENRN_COMPILE_DEFS ENABLE_SONATA_REPORTS) set(ENABLE_SONATA_REPORTS_TESTS ON) else() message(SEND_ERROR "SONATA library was found but without reporting support") endif() endif() if(NOT reportinglib_FOUND AND NOT sonata_FOUND) message(SEND_ERROR "Neither reportinglib nor SONATA libraries were found") endif() include_directories(${reportinglib_INCLUDE_DIR}) include_directories(${sonatareport_INCLUDE_DIR}) endif() if(CORENRN_ENABLE_LEGACY_UNITS) set(CORENRN_USE_LEGACY_UNITS 1) else() set(CORENRN_USE_LEGACY_UNITS 0) endif() list(APPEND CORENRN_COMPILE_DEFS CORENEURON_USE_LEGACY_UNITS=${CORENRN_USE_LEGACY_UNITS}) # Propagate Legacy Units flag to backends. set(MOD2C_ENABLE_LEGACY_UNITS ${CORENRN_ENABLE_LEGACY_UNITS} CACHE BOOL "" FORCE) set(NMODL_ENABLE_LEGACY_UNITS ${CORENRN_ENABLE_LEGACY_UNITS} CACHE BOOL "" FORCE) if(CORENRN_ENABLE_MPI_DYNAMIC) if(NOT CORENRN_ENABLE_MPI) message(FATAL_ERROR "Cannot enable dynamic mpi without mpi") endif() list(APPEND CORENRN_COMPILE_DEFS CORENEURON_ENABLE_MPI_DYNAMIC) endif() if(CORENRN_ENABLE_PRCELLSTATE) set(CORENRN_NRN_PRCELLSTATE 1) else() set(CORENRN_NRN_PRCELLSTATE 0) endif() if(MINGW) list(APPEND CORENRN_COMPILE_DEFS MINGW) endif() # ============================================================================= # NMODL specific options # ============================================================================= if(CORENRN_ENABLE_NMODL) find_package(nmodl) if(NOT "${CORENRN_NMODL_DIR}" STREQUAL "" AND NOT nmodl_FOUND) message(FATAL_ERROR "Cannot find NMODL in ${CORENRN_NMODL_DIR}") endif() if(nmodl_FOUND) set(CORENRN_MOD2CPP_BINARY ${nmodl_BINARY}) set(CORENRN_MOD2CPP_INCLUDE ${nmodl_INCLUDE}) # path to python interface set(ENV{PYTHONPATH} "${nmodl_PYTHONPATH}:$ENV{PYTHONPATH}") set(CORENRN_NMODL_PYTHONPATH $ENV{PYTHONPATH}) else() set(NMODL_ENABLE_PYTHON_BINDINGS OFF CACHE BOOL "Disable NMODL python bindings") include(AddNmodlSubmodule) set(CORENRN_MOD2CPP_BINARY ${CMAKE_BINARY_DIR}/bin/nmodl${CMAKE_EXECUTABLE_SUFFIX}) set(CORENRN_MOD2CPP_INCLUDE ${CMAKE_BINARY_DIR}/include) set(ENV{PYTHONPATH} "$ENV{PYTHONPATH}") set(nmodl_PYTHONPATH "${CMAKE_BINARY_DIR}/lib") set(CORENRN_NMODL_PYTHONPATH "${nmodl_PYTHONPATH}:$ENV{PYTHONPATH}") set(NMODL_TARGET_TO_DEPEND nmodl) endif() include_directories(${CORENRN_MOD2CPP_INCLUDE}) # set correct arguments for nmodl for cpu/gpu target set(CORENRN_NMODL_FLAGS "" CACHE STRING "Extra NMODL options such as passes") else() include(AddMod2cSubmodule) set(NMODL_TARGET_TO_DEPEND mod2c_core) set(CORENRN_MOD2CPP_BINARY ${CMAKE_BINARY_DIR}/bin/mod2c_core${CMAKE_EXECUTABLE_SUFFIX}) set(CORENRN_MOD2CPP_INCLUDE ${CMAKE_BINARY_DIR}/include) endif() # ============================================================================= # Profiler/Instrumentation Options # ============================================================================= if(CORENRN_ENABLE_CALIPER_PROFILING) find_package(caliper REQUIRED) list(APPEND CORENRN_COMPILE_DEFS CORENEURON_CALIPER) set(CORENRN_CALIPER_LIB caliper) endif() if(CORENRN_ENABLE_LIKWID_PROFILING) find_package(likwid REQUIRED) list(APPEND CORENRN_COMPILE_DEFS LIKWID_PERFMON) # TODO: avoid this part, probably by using some likwid CMake target include_directories(${likwid_INCLUDE_DIRS}) endif() # enable debugging code with extra logs to stdout if(CORENRN_ENABLE_DEBUG_CODE) list(APPEND CORENRN_COMPILE_DEFS CORENRN_DEBUG CHKPNTDEBUG CORENRN_DEBUG_QUEUE INTERLEAVE_DEBUG) endif() # ============================================================================= # Common CXX flags : ignore unknown pragma warnings # ============================================================================= # Do not set this when building wheels. The nrnivmodl workflow means that we do not know what # compiler will be invoked with these flags, so we have to use flags that are as generic as # possible. if(NOT DEFINED NRN_WHEEL_BUILD OR NOT NRN_WHEEL_BUILD) list(APPEND CORENRN_EXTRA_CXX_FLAGS "${IGNORE_UNKNOWN_PRAGMA_FLAGS}") endif() # Add the main source directory add_subdirectory(coreneuron) # Extract the various compiler option strings to use inside nrnivmodl-core. Sets the global property # CORENRN_LIB_LINK_FLAGS, which contains the arguments that must be added to the link line for # `special` to link against `libcorenrnmech.{a,so}` include(MakefileBuildOptions) # Generate the nrnivmodl-core script and makefile using the options from MakefileBuildOptions add_subdirectory(extra) if(CORENRN_ENABLE_UNIT_TESTS) add_subdirectory(tests) endif() # ============================================================================= # Install cmake modules # ============================================================================= get_property(CORENRN_NEURON_LINK_FLAGS GLOBAL PROPERTY CORENRN_NEURON_LINK_FLAGS) configure_file(CMake/coreneuron-config.cmake.in CMake/coreneuron-config.cmake @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/CMake/coreneuron-config.cmake" DESTINATION share/cmake) install(EXPORT coreneuron DESTINATION share/cmake) if(NOT CORENEURON_AS_SUBPROJECT) # ============================================================================= # Setup Doxygen documentation # ============================================================================= find_package(Doxygen QUIET) if(DOXYGEN_FOUND) # generate Doxyfile with correct source paths configure_file(${PROJECT_SOURCE_DIR}/docs/Doxyfile.in ${PROJECT_BINARY_DIR}/Doxyfile) add_custom_target( doxygen COMMAND ${DOXYGEN_EXECUTABLE} ${PROJECT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${PROJECT_BINARY_DIR} COMMENT "Generating API documentation with Doxygen" VERBATIM) endif() # ============================================================================= # Setup Sphinx documentation # ============================================================================= find_package(Sphinx QUIET) if(SPHINX_FOUND) set(SPHINX_SOURCE ${PROJECT_SOURCE_DIR}/docs) set(SPHINX_BUILD ${PROJECT_BINARY_DIR}/docs/) add_custom_target( sphinx COMMAND ${SPHINX_EXECUTABLE} -b html ${SPHINX_SOURCE} ${SPHINX_BUILD} WORKING_DIRECTORY ${PROJECT_BINARY_DIR} COMMENT "Generating documentation with Sphinx") endif() # ============================================================================= # Build full docs # ============================================================================= if(DOXYGEN_FOUND AND SPHINX_FOUND) add_custom_target( docs COMMAND ${CMAKE_COMMAND} --build ${PROJECT_BINARY_DIR} --target doxygen COMMAND ${CMAKE_COMMAND} --build ${PROJECT_BINARY_DIR} --target sphinx COMMENT "Generating full documentation") else() add_custom_target( docs VERBATIM COMMAND echo "Please install docs requirements (see docs/README.md)!" COMMENT "Documentation generation not possible!") endif() endif() # ============================================================================= # Build status # ============================================================================= message(STATUS "") message(STATUS "Configured CoreNEURON ${PROJECT_VERSION}") message(STATUS "") message(STATUS "You can now build CoreNEURON using:") message(STATUS " cmake --build . --parallel 8 [--target TARGET]") message(STATUS "You might want to adjust the number of parallel build jobs for your system.") message(STATUS "Some non-default targets you might want to build:") message(STATUS "--------------------+--------------------------------------------------------") message(STATUS " Target | Description") message(STATUS "--------------------+--------------------------------------------------------") message(STATUS "install | Will install CoreNEURON to: ${CMAKE_INSTALL_PREFIX}") message(STATUS "docs | Build full docs. Calls targets: doxygen, sphinx") message(STATUS "--------------------+--------------------------------------------------------") message(STATUS " Build option | Status") message(STATUS "--------------------+--------------------------------------------------------") message(STATUS "CXX COMPILER | ${CMAKE_CXX_COMPILER}") message(STATUS "COMPILE FLAGS | ${CORENRN_CXX_FLAGS}") message(STATUS "Build Type | ${COMPILE_LIBRARY_TYPE}") message(STATUS "MPI | ${CORENRN_ENABLE_MPI}") if(CORENRN_ENABLE_MPI) message(STATUS " DYNAMIC | ${CORENRN_ENABLE_MPI_DYNAMIC}") if(CORENRN_ENABLE_MPI_DYNAMIC AND NRN_MPI_LIBNAME_LIST) # ~~~ # for dynamic mpi, rely on neuron for list of libraries to build # this is to avoid cmake code duplication on the coreneuron side # ~~~ list(LENGTH NRN_MPI_LIBNAME_LIST _num_mpi) math(EXPR num_mpi "${_num_mpi} - 1") foreach(val RANGE ${num_mpi}) list(GET NRN_MPI_LIBNAME_LIST ${val} libname) list(GET NRN_MPI_INCLUDE_LIST ${val} include) message(STATUS " LIBNAME | core${libname}") message(STATUS " INC | ${include}") endforeach(val) else() message(STATUS " INC | ${MPI_CXX_INCLUDE_PATH}") endif() endif() message(STATUS "OpenMP | ${CORENRN_ENABLE_OPENMP}") message(STATUS "Use legacy units | ${CORENRN_ENABLE_LEGACY_UNITS}") message(STATUS "NMODL | ${CORENRN_ENABLE_NMODL}") if(CORENRN_ENABLE_NMODL) message(STATUS " FLAGS | ${CORENRN_NMODL_FLAGS}") endif() message(STATUS "MOD2CPP PATH | ${CORENRN_MOD2CPP_BINARY}") message(STATUS "GPU Support | ${CORENRN_ENABLE_GPU}") if(CORENRN_ENABLE_GPU) message(STATUS " CUDA | ${CUDAToolkit_LIBRARY_DIR}") message(STATUS " Offload | ${CORENRN_ACCELERATOR_OFFLOAD}") message(STATUS " Unified Memory | ${CORENRN_ENABLE_CUDA_UNIFIED_MEMORY}") endif() message(STATUS "Auto Timeout | ${CORENRN_ENABLE_TIMEOUT}") message(STATUS "Wrap exp() | ${CORENRN_ENABLE_HOC_EXP}") message(STATUS "SplayTree Queue | ${CORENRN_ENABLE_SPLAYTREE_QUEUING}") message(STATUS "NetReceive Buffer | ${CORENRN_ENABLE_NET_RECEIVE_BUFFER}") message(STATUS "Caliper | ${CORENRN_ENABLE_CALIPER_PROFILING}") message(STATUS "Likwid | ${CORENRN_ENABLE_LIKWID_PROFILING}") message(STATUS "Unit Tests | ${CORENRN_ENABLE_UNIT_TESTS}") message(STATUS "Reporting | ${CORENRN_ENABLE_REPORTING}") if(CORENRN_ENABLE_REPORTING) message(STATUS " sonatareport_INC | ${sonatareport_INCLUDE_DIR}") message(STATUS " sonatareport_LIB | ${sonatareport_LIBRARY}") message(STATUS " reportinglib_INC | ${reportinglib_INCLUDE_DIR}") message(STATUS " reportinglib_LIB | ${reportinglib_LIBRARY}") endif() message(STATUS "--------------+--------------------------------------------------------------") message(STATUS " See documentation : https://github.com/BlueBrain/CoreNeuron/") message(STATUS "--------------+--------------------------------------------------------------") message(STATUS "") ================================================ FILE: LICENSE.txt ================================================ Copyright (c) 2016 - 2021 Blue Brain Project/EPFL All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ :bangbang: **NOTE:** The CoreNEURON is now [integrated within NEURON](https://github.com/neuronsimulator/nrn/tree/master/src/coreneuron) simulator at the source level and hence all the latest development happens under the main GitHub project [neuronsimulator/nrn](https://github.com/neuronsimulator/nrn). To use CoreNEURON, see the latest NEURON documentation under [nrn.readthedocs.io](https://nrn.readthedocs.io/en/latest/).:bangbang: _______________________________________________________ ![CoreNEURON CI](https://github.com/BlueBrain/CoreNeuron/workflows/CoreNEURON%20CI/badge.svg) [![codecov](https://codecov.io/gh/BlueBrain/CoreNeuron/branch/master/graph/badge.svg?token=mguTdBx93p)](https://codecov.io/gh/BlueBrain/CoreNeuron) ![CoreNEURON](docs/_static/bluebrain_coreneuron.jpg) ## Citation If you would like to know more about CoreNEURON or would like to cite it, then use the following paper: * Pramod Kumbhar, Michael Hines, Jeremy Fouriaux, Aleksandr Ovcharenko, James King, Fabien Delalondre and Felix Schürmann. CoreNEURON : An Optimized Compute Engine for the NEURON Simulator ([doi.org/10.3389/fninf.2019.00063](https://doi.org/10.3389/fninf.2019.00063)) ## License * See LICENSE.txt * See [NEURON](https://github.com/neuronsimulator/nrn) ## Funding CoreNEURON is developed in a joint collaboration between the Blue Brain Project and Yale University. This work is supported by funding to the Blue Brain Project, a research center of the École polytechnique fédérale de Lausanne (EPFL), from the Swiss government’s ETH Board of the Swiss Federal Institutes of Technology, NIH grant number R01NS11613 (Yale University), the European Union Seventh Framework Program (FP7/20072013) under grant agreement n◦ 604102 (HBP) and the European Union’s Horizon 2020 Framework Programme for Research and Innovation under Specific Grant Agreement n◦ 720270 (Human Brain Project SGA1), n◦ 785907 (Human Brain Project SGA2) and n◦ 945539 (Human Brain Project SGA3). Copyright (c) 2016 - 2022 Blue Brain Project/EPFL ================================================ FILE: coreneuron/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= # Add compiler flags that should apply to all CoreNEURON targets, but which should not leak into # other included projects. add_compile_definitions(${CORENRN_COMPILE_DEFS}) add_compile_options(${CORENRN_EXTRA_CXX_FLAGS}) add_link_options(${CORENRN_EXTRA_LINK_FLAGS}) # put libraries (e.g. dll) in bin directory set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # ============================================================================= # gather various source files # ============================================================================= file( GLOB CORENEURON_CODE_FILES "apps/main1.cpp" "apps/corenrn_parameters.cpp" "gpu/nrn_acc_manager.cpp" "io/*.cpp" "io/reports/*.cpp" "mechanism/*.cpp" "mpi/core/nrnmpi_def_cinc.cpp" "network/*.cpp" "permute/*.cpp" "sim/*.cpp" "sim/scopmath/abort.cpp" "sim/scopmath/newton_thread.cpp" "utils/*.cpp" "utils/*/*.c" "utils/*/*.cpp") set(MPI_LIB_FILES "mpi/lib/mpispike.cpp" "mpi/lib/nrnmpi.cpp") if(CORENRN_ENABLE_MPI) # Building these requires -ldl, which is only added if MPI is enabled. list(APPEND CORENEURON_CODE_FILES "mpi/core/resolve.cpp" "mpi/core/nrnmpidec.cpp") endif() file(COPY ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include/Random123 DESTINATION ${CMAKE_BINARY_DIR}/include) list(APPEND CORENEURON_CODE_FILES ${PROJECT_BINARY_DIR}/coreneuron/config/config.cpp) set(ENGINEMECH_CODE_FILE "mechanism/mech/enginemech.cpp") # for external mod files we need to generate modl_ref function in mod_func.c set(MODFUNC_PERL_SCRIPT "mechanism/mech/mod_func.c.pl") set(NMODL_UNITS_FILE "${CMAKE_BINARY_DIR}/share/mod2c/nrnunits.lib") # ============================================================================= # Copy files that are required by nrnivmodl-core to the build tree at build time. # ============================================================================= cpp_cc_build_time_copy( INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${MODFUNC_PERL_SCRIPT}" OUTPUT "${CMAKE_BINARY_DIR}/share/coreneuron/mod_func.c.pl" NO_TARGET) cpp_cc_build_time_copy( INPUT "${CMAKE_CURRENT_SOURCE_DIR}/${ENGINEMECH_CODE_FILE}" OUTPUT "${CMAKE_BINARY_DIR}/share/coreneuron/enginemech.cpp" NO_TARGET) set(nrnivmodl_core_dependencies "${CMAKE_BINARY_DIR}/share/coreneuron/mod_func.c.pl" "${CMAKE_BINARY_DIR}/share/coreneuron/enginemech.cpp") # Set up build rules that copy builtin mod files from # {source}/coreneuron/mechanism/mech/modfile/*.mod to {build_dir}/share/modfile/ file(GLOB builtin_modfiles "${CORENEURON_PROJECT_SOURCE_DIR}/coreneuron/mechanism/mech/modfile/*.mod") foreach(builtin_modfile ${builtin_modfiles}) # Construct the path in the build directory. get_filename_component(builtin_modfile_name "${builtin_modfile}" NAME) set(modfile_build_path "${CMAKE_BINARY_DIR}/share/modfile/${builtin_modfile_name}") # Create a build rule to copy the modfile there. cpp_cc_build_time_copy( INPUT "${builtin_modfile}" OUTPUT "${modfile_build_path}" NO_TARGET) list(APPEND nrnivmodl_core_dependencies "${modfile_build_path}") endforeach() add_custom_target(coreneuron-copy-nrnivmodl-core-dependencies ALL DEPENDS ${nrnivmodl_core_dependencies}) # Store the build-tree modfile paths in a cache variable; these are an implicit dependency of # nrnivmodl-core. set(CORENEURON_BUILTIN_MODFILES "${nrnivmodl_core_dependencies}" CACHE STRING "List of builtin modfiles that nrnivmodl-core implicitly depends on" FORCE) # ============================================================================= # coreneuron GPU library # ============================================================================= if(CORENRN_ENABLE_GPU) # ~~~ # artificial cells and some other cpp files (using Random123) should be compiled # without OpenACC to avoid use of GPU Random123 streams # OL210813: this shouldn't be needed anymore, but it may have a small performance benefit # ~~~ set(OPENACC_EXCLUDED_FILES ${CMAKE_CURRENT_BINARY_DIR}/netstim.cpp ${CMAKE_CURRENT_BINARY_DIR}/netstim_inhpoisson.cpp ${CMAKE_CURRENT_BINARY_DIR}/pattern.cpp ${CMAKE_CURRENT_SOURCE_DIR}/io/nrn_setup.cpp ${CMAKE_CURRENT_SOURCE_DIR}/io/setup_fornetcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/io/corenrn_data_return.cpp ${CMAKE_CURRENT_SOURCE_DIR}/io/global_vars.cpp) set_source_files_properties(${OPENACC_EXCLUDED_FILES} PROPERTIES COMPILE_FLAGS "-DDISABLE_OPENACC") # Only compile the explicit CUDA implementation of the Hines solver in GPU builds. Because of # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 # this cannot be included in the same shared library as the rest of the OpenACC code. set(CORENEURON_CUDA_FILES ${CMAKE_CURRENT_SOURCE_DIR}/permute/cellorder.cu) # Eigen functions cannot be called directly from OpenACC regions, but Eigen is sort-of compatible # with being compiled as CUDA code. Because of # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 # this has to mean `nvc++ -cuda` rather than `nvcc`. We explicitly instantiate Eigen functions for # different matrix sizes in partial_piv_lu.cpp (with CUDA attributes but without OpenACC or OpenMP # annotations) and dispatch to these from a wrapper in partial_piv_lu.h that does have # OpenACC/OpenMP annotations. if(CORENRN_ENABLE_NMODL AND EXISTS ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) list(APPEND CORENEURON_CODE_FILES ${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp) if(CORENRN_ENABLE_GPU AND CORENRN_HAVE_NVHPC_COMPILER AND CMAKE_BUILD_TYPE STREQUAL "Debug") # In this case OpenAccHelper.cmake passes -gpu=debug, which makes these Eigen functions # extremely slow. Downgrade that to -gpu=lineinfo for this file. set_source_files_properties(${CORENRN_MOD2CPP_INCLUDE}/partial_piv_lu/partial_piv_lu.cpp PROPERTIES COMPILE_FLAGS "-gpu=lineinfo,nodebug -O1") endif() endif() endif() # ============================================================================= # create libraries # ============================================================================= # name of coreneuron mpi objects or dynamic library set(CORENRN_MPI_LIB_NAME "corenrn_mpi" CACHE INTERNAL "") # for non-dynamic mpi mode just build object files if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) add_library(${CORENRN_MPI_LIB_NAME} OBJECT ${MPI_LIB_FILES}) target_include_directories( ${CORENRN_MPI_LIB_NAME} PRIVATE ${MPI_INCLUDE_PATH} ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) target_link_libraries(${CORENRN_MPI_LIB_NAME} ${CORENRN_CALIPER_LIB}) set_property(TARGET ${CORENRN_MPI_LIB_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) set(CORENRN_MPI_OBJ $) endif() # Library containing the bulk of the non-mechanism CoreNEURON code. This is always created and # installed as a static library, and then the nrnivmodl-core workflow extracts the object files from # it and does one of the following: # # * shared build: creates libcorenrnmech.so from these objects plus those from the translated MOD # files # * static build: creates a (temporary, does not get installed) libcorenrnmech.a from these objects # plus those from the translated MOD files, then statically links that into special-core # (nrniv-core) # # This scheme means that both core and mechanism .o files are linked in a single step, which is # important for GPU linking. It does, however, mean that the core code is installed twice, once in # libcoreneuron-core.a and once in libcorenrnmech.so (shared) or nrniv-core (static). In a GPU # build, libcoreneuron-cuda.{a,so} is also linked to provide the CUDA implementation of the Hines # solver. This cannot be included in coreneuron-core because of this issue: # https://forums.developer.nvidia.com/t/cannot-dynamically-load-a-shared-library-containing-both-openacc-and-cuda-code/210972 add_library(coreneuron-core STATIC ${CORENEURON_CODE_FILES} ${CORENRN_MPI_OBJ}) if(CORENRN_ENABLE_GPU) set(coreneuron_cuda_target coreneuron-cuda) add_library(coreneuron-cuda ${COMPILE_LIBRARY_TYPE} ${CORENEURON_CUDA_FILES}) target_link_libraries(coreneuron-core PUBLIC coreneuron-cuda) endif() foreach(target coreneuron-core ${coreneuron_cuda_target}) target_include_directories(${target} PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) endforeach() # we can link to MPI libraries in non-dynamic-mpi build if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC) target_link_libraries(coreneuron-core PUBLIC ${MPI_CXX_LIBRARIES}) endif() # ~~~ # main coreneuron library needs to be linked to libdl.so # only in case of dynamic mpi build. But on old system # like centos7, we saw mpich library require explici # link to libdl.so. See # https://github.com/neuronsimulator/nrn-build-ci/pull/51 # ~~~ target_link_libraries(coreneuron-core PUBLIC ${CMAKE_DL_LIBS}) # this is where we handle dynamic mpi library build if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC) # store mpi library targets that will be built list(APPEND corenrn_mpi_targets "") # ~~~ # if coreneuron is built as a submodule of neuron then check if NEURON has created # list of libraries that needs to be built. We use neuron cmake variables here because # we don't need to duplicate CMake code into coreneuron (we want to have unified cmake # project soon). In the absense of neuron just build a single library libcorenrn_mpi. # This is mostly used for the testing. # ~~~ if(NOT CORENEURON_AS_SUBPROJECT) add_library(${CORENRN_MPI_LIB_NAME} SHARED ${MPI_LIB_FILES}) target_link_libraries(${CORENRN_MPI_LIB_NAME} ${MPI_CXX_LIBRARIES}) target_include_directories( ${CORENRN_MPI_LIB_NAME} PRIVATE ${MPI_INCLUDE_PATH} ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) set_property(TARGET ${CORENRN_MPI_LIB_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON) list(APPEND corenrn_mpi_targets ${CORENRN_MPI_LIB_NAME}) else() # ~~~ # from neuron we know how many different libraries needs to be built, their names # include paths to be used for building shared libraries. Iterate through those # and build separate library for each MPI distribution. For example, following # libraries are created: # - libcorenrn_mpich.so # - libcorenrn_ompi.so # - libcorenrn_mpt.so # ~~~ list(LENGTH NRN_MPI_LIBNAME_LIST _num_mpi) math(EXPR num_mpi "${_num_mpi} - 1") foreach(val RANGE ${num_mpi}) list(GET NRN_MPI_INCLUDE_LIST ${val} include) list(GET NRN_MPI_LIBNAME_LIST ${val} libname) add_library(core${libname}_lib SHARED ${MPI_LIB_FILES}) target_link_libraries(core${libname}_lib ${CORENRN_CALIPER_LIB}) target_include_directories( core${libname}_lib PUBLIC ${include} PRIVATE ${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated) # ~~~ # TODO: somehow mingw requires explicit linking. This needs to be verified # when we will test coreneuron on windows. # ~~~ if(MINGW) # type msmpi only add_dependencies(core${libname}_lib coreneuron-core) target_link_libraries(core${libname}_lib ${MPI_C_LIBRARIES} coreneuron-core) endif() set_property(TARGET core${libname}_lib PROPERTY OUTPUT_NAME core${libname}) list(APPEND corenrn_mpi_targets "core${libname}_lib") endforeach(val) endif() set_target_properties( ${corenrn_mpi_targets} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib POSITION_INDEPENDENT_CODE ON) install(TARGETS ${corenrn_mpi_targets} DESTINATION lib) endif() # Suppress some compiler warnings. target_compile_options(coreneuron-core PRIVATE ${CORENEURON_CXX_WARNING_SUPPRESSIONS}) target_link_libraries(coreneuron-core PUBLIC ${reportinglib_LIBRARY} ${sonatareport_LIBRARY} ${CORENRN_CALIPER_LIB} ${likwid_LIBRARIES}) # TODO: fix adding a dependency of coreneuron-core on CLI11::CLI11 when CLI11 is a submodule. Right # now this doesn't work because the CLI11 targets are not exported/installed but coreneuron-core is. get_target_property(CLI11_HEADER_DIRECTORY CLI11::CLI11 INTERFACE_INCLUDE_DIRECTORIES) target_include_directories( coreneuron-core SYSTEM PRIVATE ${CLI11_HEADER_DIRECTORY} ${CORENEURON_PROJECT_SOURCE_DIR}/external/Random123/include) # See: https://en.cppreference.com/w/cpp/filesystem#Notes if(CMAKE_CXX_COMPILER_IS_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) target_link_libraries(coreneuron-core PUBLIC stdc++fs) endif() if(CORENRN_ENABLE_GPU) # nrnran123.cpp uses Boost.Pool in GPU builds if it's available. find_package(Boost QUIET) if(Boost_FOUND) message(STATUS "Boost found, enabling use of memory pools for Random123...") target_include_directories(coreneuron-core SYSTEM PRIVATE ${Boost_INCLUDE_DIRS}) target_compile_definitions(coreneuron-core PRIVATE CORENEURON_USE_BOOST_POOL) endif() endif() set_target_properties( coreneuron-core ${coreneuron_cuda_target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib POSITION_INDEPENDENT_CODE ${CORENRN_ENABLE_SHARED}) cpp_cc_configure_sanitizers(TARGET coreneuron-core ${coreneuron_cuda_target} ${corenrn_mpi_targets}) # ============================================================================= # create special-core with halfgap.mod for tests # ============================================================================= set(modfile_directory "${CORENEURON_PROJECT_SOURCE_DIR}/tests/integration/ring_gap/mod files") file(GLOB modfiles "${modfile_directory}/*.mod") # We have to link things like unit tests against this because some "core" .cpp files refer to # symbols in the translated versions of default .mod files set(nrniv_core_prefix "${CMAKE_BINARY_DIR}/bin/${CMAKE_SYSTEM_PROCESSOR}") set(corenrn_mech_library "${nrniv_core_prefix}/${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_PREFIX}corenrnmech${CMAKE_${COMPILE_LIBRARY_TYPE}_LIBRARY_SUFFIX}" ) set(output_binaries "${nrniv_core_prefix}/special-core" "${corenrn_mech_library}") add_custom_command( OUTPUT ${output_binaries} DEPENDS coreneuron-core ${NMODL_TARGET_TO_DEPEND} ${modfiles} ${CORENEURON_BUILTIN_MODFILES} COMMAND ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core -b ${COMPILE_LIBRARY_TYPE} -m ${CORENRN_MOD2CPP_BINARY} -p 4 "${modfile_directory}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/bin COMMENT "Running nrnivmodl-core with halfgap.mod") add_custom_target(nrniv-core ALL DEPENDS ${output_binaries}) # Build a target representing the libcorenrnmech.so that is produced under bin/x86_64, which # executables such as the unit tests must link against add_library(builtin-libcorenrnmech SHARED IMPORTED) add_dependencies(builtin-libcorenrnmech nrniv-core) set_target_properties(builtin-libcorenrnmech PROPERTIES IMPORTED_LOCATION "${corenrn_mech_library}") if(CORENRN_ENABLE_GPU) separate_arguments(CORENRN_ACC_FLAGS UNIX_COMMAND "${NVHPC_ACC_COMP_FLAGS}") target_compile_options(coreneuron-core PRIVATE ${CORENRN_ACC_FLAGS}) endif() # Create an extra target for use by NEURON when CoreNEURON is being built as a submodule. NEURON # tests will depend on this, so it must in turn depend on everything that is needed to run nrnivmodl # -coreneuron. add_custom_target(coreneuron-for-tests) add_dependencies(coreneuron-for-tests coreneuron-core ${NMODL_TARGET_TO_DEPEND}) # Create an extra target for internal use that unit tests and so on can depend on. # ${corenrn_mech_library} is libcorenrnmech.{a,so}, which contains both the compiled default # mechanisms and the content of libcoreneuron-core.a. add_library(coreneuron-all INTERFACE) target_link_libraries(coreneuron-all INTERFACE builtin-libcorenrnmech) # Also copy the dependencies of libcoreneuron-core as interface dependencies of this new target # (example: ${corenrn_mech_library} will probably depend on MPI, so when the unit tests link against # ${corenrn_mech_library} they need to know to link against MPI too). get_target_property(coreneuron_core_deps coreneuron-core LINK_LIBRARIES) if(coreneuron_core_deps) foreach(dep ${coreneuron_core_deps}) target_link_libraries(coreneuron-all INTERFACE ${dep}) endforeach() endif() # Make headers avail to build tree configure_file(engine.h.in ${CMAKE_BINARY_DIR}/include/coreneuron/engine.h @ONLY) file( GLOB_RECURSE main_headers RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" *.h *.hpp) configure_file("${CORENEURON_PROJECT_BINARY_DIR}/generated/coreneuron/config/neuron_version.hpp" "${CMAKE_BINARY_DIR}/include/coreneuron/config/neuron_version.hpp" COPYONLY) foreach(header ${main_headers}) configure_file("${header}" "${CMAKE_BINARY_DIR}/include/coreneuron/${header}" COPYONLY) endforeach() configure_file("utils/profile/profiler_interface.h" ${CMAKE_BINARY_DIR}/include/coreneuron/nrniv/profiler_interface.h COPYONLY) # main program required for building special-core file(COPY apps/coreneuron.cpp DESTINATION ${CMAKE_BINARY_DIR}/share/coreneuron) # ============================================================================= # Install main targets # ============================================================================= # coreneuron main libraries install( TARGETS coreneuron-core ${coreneuron_cuda_target} EXPORT coreneuron LIBRARY DESTINATION lib ARCHIVE DESTINATION lib INCLUDES DESTINATION $) # headers and some standalone code files for nrnivmodl-core install( DIRECTORY ${CMAKE_BINARY_DIR}/include/coreneuron DESTINATION include/ FILES_MATCHING PATTERN "*.h*" PATTERN "*.ipp") install(FILES ${MODFUNC_PERL_SCRIPT} ${ENGINEMECH_CODE_FILE} DESTINATION share/coreneuron) # copy mod2c/nmodl for nrnivmodl-core install(PROGRAMS ${CORENRN_MOD2CPP_BINARY} DESTINATION bin) if(NOT CORENRN_ENABLE_NMODL) install(FILES ${NMODL_UNITS_FILE} DESTINATION share/mod2c) endif() # install nrniv-core app install( PROGRAMS ${CMAKE_BINARY_DIR}/bin/${CMAKE_HOST_SYSTEM_PROCESSOR}/special-core DESTINATION bin RENAME nrniv-core) install(FILES apps/coreneuron.cpp DESTINATION share/coreneuron) # install mechanism library in shared library builds, if we're linking statically then there is no # need if(CORENRN_ENABLE_SHARED) install(FILES ${corenrn_mech_library} DESTINATION lib) endif() # install random123 and nmodl headers install(DIRECTORY ${CMAKE_BINARY_DIR}/include/ DESTINATION include) # install mod files install(DIRECTORY ${CMAKE_BINARY_DIR}/share/modfile DESTINATION share) ================================================ FILE: coreneuron/apps/coreneuron.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/utils/profile/profiler_interface.h" int main(int argc, char** argv) { coreneuron::Instrumentor::init_profile(); auto solve_core_result = solve_core(argc, argv); coreneuron::Instrumentor::finalize_profile(); return solve_core_result; } ================================================ FILE: coreneuron/apps/corenrn_parameters.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/apps/corenrn_parameters.hpp" #include namespace coreneuron { extern std::string cnrn_version(); corenrn_parameters::corenrn_parameters() : m_app{std::make_unique("CoreNeuron - Optimised Simulator Engine for NEURON.")} { auto& app = *m_app; app.set_config("--read-config", "", "Read parameters from ini file", false) ->check(CLI::ExistingFile); app.add_option("--write-config", this->writeParametersFilepath, "Write parameters to this file", false); app.add_flag( "--mpi", this->mpi_enable, "Enable MPI. In order to initialize MPI environment this argument must be specified."); app.add_option("--mpi-lib", this->mpi_lib, "CoreNEURON MPI library to load for dynamic MPI support", false); app.add_flag("--gpu", this->gpu, "Activate GPU computation."); app.add_option("--dt", this->dt, "Fixed time step. The default value is set by defaults.dat or is 0.025.", true) ->check(CLI::Range(-1'000., 1e9)); app.add_option("-e, --tstop", this->tstop, "Stop Time in ms.")->check(CLI::Range(0., 1e9)); app.add_flag("--show"); app.add_set( "--verbose", this->verbose, {verbose_level::NONE, verbose_level::ERROR, verbose_level::INFO, verbose_level::DEBUG_INFO}, "Verbose level: 0 = NONE, 1 = ERROR, 2 = INFO, 3 = DEBUG. Default is INFO"); app.add_flag("--model-stats", this->model_stats, "Print number of instances of each mechanism and detailed memory stats."); auto sub_gpu = app.add_option_group("GPU", "Commands relative to GPU."); sub_gpu ->add_option("-W, --nwarp", this->nwarp, "Number of warps to execute in parallel the Hines solver. Each warp solves a " "group of cells. (Only used with cell permute 2)", true) ->check(CLI::Range(0, 1'000'000)); sub_gpu ->add_option("-R, --cell-permute", this->cell_interleave_permute, "Cell permutation: 0 No permutation; 1 optimise node adjacency; 2 optimize " "parent adjacency.", true) ->check(CLI::Range(0, 2)); sub_gpu->add_flag("--cuda-interface", this->cuda_interface, "Activate CUDA branch of the code."); sub_gpu->add_option("-n, --num-gpus", this->num_gpus, "Number of gpus to use per node."); auto sub_input = app.add_option_group("input", "Input dataset options."); sub_input->add_option("-d, --datpath", this->datpath, "Path containing CoreNeuron data files.") ->check(CLI::ExistingDirectory); sub_input->add_option("-f, --filesdat", this->filesdat, "Name for the distribution file.", true) ->check(CLI::ExistingFile); sub_input ->add_option("-p, --pattern", this->patternstim, "Apply patternstim using the specified spike file.") ->check(CLI::ExistingFile); sub_input ->add_option("-s, --seed", this->seed, "Initialization seed for random number generator.") ->check(CLI::Range(0, 100'000'000)); sub_input ->add_option("-v, --voltage", this->voltage, "Initial voltage used for nrn_finitialize(1, v_init). If 1000, then " "nrn_finitialize(0,...).") ->check(CLI::Range(-1e9, 1e9)); sub_input->add_option("--report-conf", this->reportfilepath, "Reports configuration file.") ->check(CLI::ExistingFile); sub_input ->add_option("--restore", this->restorepath, "Restore simulation from provided checkpoint directory.") ->check(CLI::ExistingDirectory); auto sub_parallel = app.add_option_group("parallel", "Parallel processing options."); sub_parallel->add_flag("-c, --threading", this->threading, "Parallel threads. The default is serial threads."); sub_parallel->add_flag("--skip-mpi-finalize", this->skip_mpi_finalize, "Do not call mpi finalize."); auto sub_spike = app.add_option_group("spike", "Spike exchange options."); sub_spike ->add_option("--ms-phases", this->ms_phases, "Number of multisend phases, 1 or 2.", true) ->check(CLI::Range(1, 2)); sub_spike ->add_option("--ms-subintervals", this->ms_subint, "Number of multisend subintervals, 1 or 2.", true) ->check(CLI::Range(1, 2)); sub_spike->add_flag("--multisend", this->multisend, "Use Multisend spike exchange instead of Allgather."); sub_spike ->add_option("--spkcompress", this->spkcompress, "Spike compression. Up to ARG are exchanged during MPI_Allgather.", true) ->check(CLI::Range(0, 100'000)); sub_spike->add_flag("--binqueue", this->binqueue, "Use bin queue."); auto sub_config = app.add_option_group("config", "Config options."); sub_config->add_option("-b, --spikebuf", this->spikebuf, "Spike buffer size.", true) ->check(CLI::Range(0, 2'000'000'000)); sub_config ->add_option("-g, --prcellgid", this->prcellgid, "Output prcellstate information for the gid NUMBER.") ->check(CLI::Range(-1, 2'000'000'000)); sub_config->add_option("-k, --forwardskip", this->forwardskip, "Forwardskip to TIME") ->check(CLI::Range(0., 1e9)); sub_config ->add_option( "-l, --celsius", this->celsius, "Temperature in degC. The default value is set in defaults.dat or else is 34.0.", true) ->check(CLI::Range(-1000., 1000.)); sub_config ->add_option("--mindelay", this->mindelay, "Maximum integration interval (likely reduced by minimum NetCon delay).", true) ->check(CLI::Range(0., 1e9)); sub_config ->add_option("--report-buffer-size", this->report_buff_size, "Size in MB of the report buffer.") ->check(CLI::Range(1, 128)); auto sub_output = app.add_option_group("output", "Output configuration."); sub_output->add_option("-i, --dt_io", this->dt_io, "Dt of I/O.", true) ->check(CLI::Range(-1000., 1e9)); sub_output->add_option("-o, --outpath", this->outpath, "Path to place output data files.", true); sub_output->add_option("--checkpoint", this->checkpointpath, "Enable checkpoint and specify directory to store related files."); app.add_flag("-v, --version", this->show_version, "Show version information and quit."); CLI::retire_option(app, "--show"); } // Implementation in .cpp file where CLI types are complete. corenrn_parameters::~corenrn_parameters() = default; std::string corenrn_parameters::config_to_str(bool default_also, bool write_description) const { return m_app->config_to_str(default_also, write_description); } void corenrn_parameters::reset() { static_cast(*this) = corenrn_parameters_data{}; m_app->clear(); } void corenrn_parameters::parse(int argc, char** argv) { try { m_app->parse(argc, argv); if (verbose == verbose_level::NONE) { nrn_nobanner_ = 1; } } catch (const CLI::ExtrasError& e) { // in case of parsing errors, show message with exception std::cerr << "CLI parsing error, see nrniv-core --help for more information. \n" << std::endl; m_app->exit(e); throw e; } catch (const CLI::ParseError& e) { // use --help is also ParseError; in this case exit by showing all options m_app->exit(e); exit(0); } #ifndef CORENEURON_ENABLE_GPU if (gpu) { std::cerr << "Error: GPU support was not enabled at build time but GPU execution was requested." << std::endl; exit(42); } #endif // is user has asked for version info, print it and exit if (show_version) { std::cout << "CoreNEURON Version : " << cnrn_version() << std::endl; exit(0); } }; std::ostream& operator<<(std::ostream& os, const corenrn_parameters& corenrn_param) { os << "GENERAL PARAMETERS" << std::endl << "--mpi=" << (corenrn_param.mpi_enable ? "true" : "false") << std::endl << "--mpi-lib=" << corenrn_param.mpi_lib << std::endl << "--gpu=" << (corenrn_param.gpu ? "true" : "false") << std::endl << "--dt=" << corenrn_param.dt << std::endl << "--tstop=" << corenrn_param.tstop << std::endl << std::endl << "GPU" << std::endl << "--nwarp=" << corenrn_param.nwarp << std::endl << "--cell-permute=" << corenrn_param.cell_interleave_permute << std::endl << "--cuda-interface=" << (corenrn_param.cuda_interface ? "true" : "false") << std::endl << std::endl << "INPUT PARAMETERS" << std::endl << "--voltage=" << corenrn_param.voltage << std::endl << "--seed=" << corenrn_param.seed << std::endl << "--datpath=" << corenrn_param.datpath << std::endl << "--filesdat=" << corenrn_param.filesdat << std::endl << "--pattern=" << corenrn_param.patternstim << std::endl << "--report-conf=" << corenrn_param.reportfilepath << std::endl << std::left << std::setw(15) << "--restore=" << corenrn_param.restorepath << std::endl << std::endl << "PARALLEL COMPUTATION PARAMETERS" << std::endl << "--threading=" << (corenrn_param.threading ? "true" : "false") << std::endl << "--skip_mpi_finalize=" << (corenrn_param.skip_mpi_finalize ? "true" : "false") << std::endl << std::endl << "SPIKE EXCHANGE" << std::endl << "--ms_phases=" << corenrn_param.ms_phases << std::endl << "--ms_subintervals=" << corenrn_param.ms_subint << std::endl << "--multisend=" << (corenrn_param.multisend ? "true" : "false") << std::endl << "--spk_compress=" << corenrn_param.spkcompress << std::endl << "--binqueue=" << (corenrn_param.binqueue ? "true" : "false") << std::endl << std::endl << "CONFIGURATION" << std::endl << "--spikebuf=" << corenrn_param.spikebuf << std::endl << "--prcellgid=" << corenrn_param.prcellgid << std::endl << "--forwardskip=" << corenrn_param.forwardskip << std::endl << "--celsius=" << corenrn_param.celsius << std::endl << "--mindelay=" << corenrn_param.mindelay << std::endl << "--report-buffer-size=" << corenrn_param.report_buff_size << std::endl << std::endl << "OUTPUT PARAMETERS" << std::endl << "--dt_io=" << corenrn_param.dt_io << std::endl << "--outpath=" << corenrn_param.outpath << std::endl << "--checkpoint=" << corenrn_param.checkpointpath << std::endl; return os; } corenrn_parameters corenrn_param; int nrn_nobanner_{0}; } // namespace coreneuron ================================================ FILE: coreneuron/apps/corenrn_parameters.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include /** * \class corenrn_parameters * \brief Parses and contains Command Line parameters for Core Neuron * * This structure contains all the parameters that CoreNeuron fetches * from the Command Line. It uses the CLI11 libraries to parse these parameters * and saves them in an internal public structure. Each parameter can be * accessed or written freely. By default the constructor instantiates a * CLI11 object and initializes it for CoreNeuron use. * This object is freely accessible from any point of the program. * An ostream method is also provided to print out all the parameters that * CLI11 parse. * Please keep in mind that, due to the nature of the subcommands in CLI11, * the command line parameters for subcategories NEED to be come before the relative * parameter. e.g. --mpi --gpu gpu --nwarp * Also single dash long options are not supported anymore (-mpi -> --mpi). */ namespace CLI { struct App; } namespace coreneuron { struct corenrn_parameters_data { enum verbose_level : std::uint32_t { NONE = 0, ERROR = 1, INFO = 2, DEBUG_INFO = 3, DEFAULT = INFO }; static constexpr int report_buff_size_default = 4; unsigned spikebuf = 100'000; /// Internal buffer used on every rank for spikes int prcellgid = -1; /// Gid of cell for prcellstate unsigned ms_phases = 2; /// Number of multisend phases, 1 or 2 unsigned ms_subint = 2; /// Number of multisend interval. 1 or 2 unsigned spkcompress = 0; /// Spike Compression unsigned cell_interleave_permute = 0; /// Cell interleaving permutation unsigned nwarp = 65536; /// Number of warps to balance for cell_interleave_permute == 2 unsigned num_gpus = 0; /// Number of gpus to use per node unsigned report_buff_size = report_buff_size_default; /// Size in MB of the report buffer. int seed = -1; /// Initialization seed for random number generator (int) bool mpi_enable = false; /// Enable MPI flag. bool skip_mpi_finalize = false; /// Skip MPI finalization bool multisend = false; /// Use Multisend spike exchange instead of Allgather. bool threading = false; /// Enable pthread/openmp bool gpu = false; /// Enable GPU computation. bool cuda_interface = false; /// Enable CUDA interface (default is the OpenACC interface). /// Branch of the code is executed through CUDA kernels instead of /// OpenACC regions. bool binqueue = false; /// Use bin queue. bool show_version = false; /// Print version and exit. bool model_stats = false; /// Print mechanism counts and model size after initialization verbose_level verbose{verbose_level::DEFAULT}; /// Verbosity-level double tstop = 100; /// Stop time of simulation in msec double dt = -1000.0; /// Timestep to use in msec double dt_io = 0.1; /// I/O timestep to use in msec double dt_report; /// I/O timestep to use in msec for reports double celsius = -1000.0; /// Temperature in degC. double voltage = -65.0; /// Initial voltage used for nrn_finitialize(1, v_init). double forwardskip = 0.; /// Forward skip to TIME. double mindelay = 10.; /// Maximum integration interval (likely reduced by minimum NetCon /// delay). std::string patternstim; /// Apply patternstim using the specified spike file. std::string datpath = "."; /// Directory path where .dat files std::string outpath = "."; /// Directory where spikes will be written std::string filesdat = "files.dat"; /// Name of file containing list of gids dat files read in std::string restorepath; /// Restore simulation from provided checkpoint directory. std::string reportfilepath; /// Reports configuration file. std::string checkpointpath; /// Enable checkpoint and specify directory to store related files. std::string writeParametersFilepath; /// Write parameters to this file std::string mpi_lib; /// Name of CoreNEURON MPI library to load dynamically. }; struct corenrn_parameters: corenrn_parameters_data { corenrn_parameters(); /// Constructor that initializes the CLI11 app. ~corenrn_parameters(); /// Destructor defined in .cpp where CLI11 types are complete. void parse(int argc, char* argv[]); /// Runs the CLI11_PARSE macro. /** @brief Reset all parameters to their default values. * * Unfortunately it is awkward to support `x = corenrn_parameters{}` * because `app` holds pointers to members of `corenrn_parameters`. */ void reset(); inline bool is_quiet() { return verbose == verbose_level::NONE; } /** @brief Return a string summarising the current parameter values. * * This forwards to the CLI11 method of the same name. Returns a string that * could be read in as a config of the current values of the App. * * @param default_also Include any defaulted arguments. * @param write_description Include option descriptions and the App description. */ std::string config_to_str(bool default_also = false, bool write_description = false) const; private: // CLI app that performs CLI parsing. std::unique_ptr avoids having to // include CLI11 headers from CoreNEURON headers, and therefore avoids // CoreNEURON having to install CLI11 when using it from a submodule. std::unique_ptr m_app; }; std::ostream& operator<<(std::ostream& os, const corenrn_parameters& corenrn_param); /// Printing method. extern corenrn_parameters corenrn_param; /// Declaring global corenrn_parameters object for this /// instance of CoreNeuron. extern int nrn_nobanner_; /// Global no banner setting } // namespace coreneuron ================================================ FILE: coreneuron/apps/main1.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /** * @file main1.cpp * @date 26 Oct 2014 * @brief File containing main driver routine for CoreNeuron */ #include #include #include #include #include #include "coreneuron/config/config.h" #include "coreneuron/utils/randoms/nrnran123.h" #include "coreneuron/nrnconf.h" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/mechanism/register_mech.hpp" #include "coreneuron/io/output_spikes.hpp" #include "coreneuron/io/nrn_checkpoint.hpp" #include "coreneuron/utils/memory_utils.h" #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/io/prcellstate.hpp" #include "coreneuron/utils/nrn_stats.h" #include "coreneuron/io/reports/nrnreport.hpp" #include "coreneuron/io/reports/binary_report_handler.hpp" #include "coreneuron/io/reports/report_handler.hpp" #include "coreneuron/io/reports/sonata_report_handler.hpp" #include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/network/partrans.hpp" #include "coreneuron/network/multisend.hpp" #include "coreneuron/io/nrn_setup.hpp" #include "coreneuron/io/file_utils.hpp" #include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/io/core2nrn_data_return.hpp" #include "coreneuron/utils/utils.hpp" extern "C" { const char* corenrn_version() { return coreneuron::bbcore_write_version; } // the CORENEURON_USE_LEGACY_UNITS determined by CORENRN_ENABLE_LEGACY_UNITS bool corenrn_units_use_legacy() { return CORENEURON_USE_LEGACY_UNITS; } void (*nrn2core_part2_clean_)(); /** * If "export OMP_NUM_THREADS=n" is not set then omp by default sets * the number of threads equal to the number of cores on this node. * If there are a number of mpi processes on this node as well, things * can go very slowly as there are so many more threads than cores. * Assume the NEURON users pc.nthread() is well chosen if * OMP_NUM_THREADS is not set. */ void set_openmp_threads(int nthread) { #if defined(_OPENMP) if (!getenv("OMP_NUM_THREADS")) { omp_set_num_threads(nthread); } #endif } /** * Convert char* containing arguments from neuron to char* argv[] for * coreneuron command line argument parser. */ char* prepare_args(int& argc, char**& argv, int use_mpi, const char* mpi_lib, const char* arg) { // first construct all arguments as string std::string args(arg); args.insert(0, " coreneuron "); args.append(" --skip-mpi-finalize "); if (use_mpi) { args.append(" --mpi "); } // if neuron has passed name of MPI library then add it to CLI std::string corenrn_mpi_lib{mpi_lib}; if (!corenrn_mpi_lib.empty()) { args.append(" --mpi-lib "); corenrn_mpi_lib += " "; args.append(corenrn_mpi_lib); } // we can't modify string with strtok, make copy char* first = strdup(args.c_str()); const char* sep = " "; // first count the no of argument char* token = strtok(first, sep); argc = 0; while (token) { token = strtok(nullptr, sep); argc++; } free(first); // now build char*argv argv = new char*[argc]; first = strdup(args.c_str()); token = strtok(first, sep); for (int i = 0; token; i++) { argv[i] = token; token = strtok(nullptr, sep); } // return actual data to be freed return first; } } namespace coreneuron { void call_prcellstate_for_prcellgid(int prcellgid, int compute_gpu, int is_init); // bsize = 0 then per step transfer // bsize > 1 then full trajectory save into arrays. void get_nrn_trajectory_requests(int bsize) { if (nrn2core_get_trajectory_requests_) { for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread& nt = nrn_threads[tid]; int n_pr; int n_trajec; int* types; int* indices; void** vpr; double** varrays; double** pvars; // bsize is passed by reference, the return value will determine if // per step return or entire trajectory return. (*nrn2core_get_trajectory_requests_)( tid, bsize, n_pr, vpr, n_trajec, types, indices, pvars, varrays); delete_trajectory_requests(nt); if (n_trajec) { TrajectoryRequests* tr = new TrajectoryRequests; nt.trajec_requests = tr; tr->bsize = bsize; tr->n_pr = n_pr; tr->n_trajec = n_trajec; tr->vsize = 0; tr->vpr = vpr; tr->gather = new double*[n_trajec]; tr->varrays = varrays; tr->scatter = pvars; for (int i = 0; i < n_trajec; ++i) { tr->gather[i] = stdindex2ptr(types[i], indices[i], nt); } delete[] types; delete[] indices; } } } } void nrn_init_and_load_data(int argc, char* argv[], CheckPoints& checkPoints, bool is_mapping_needed, bool run_setup_cleanup) { #if defined(NRN_FEEXCEPT) nrn_feenableexcept(); #endif /// profiler like tau/vtune : do not measure from begining Instrumentor::stop_profile(); // memory footprint after mpi initialisation if (!corenrn_param.is_quiet()) { report_mem_usage("After MPI_Init"); } // initialise default coreneuron parameters initnrn(); // set global variables // precedence is: set by user, globals.dat, 34.0 celsius = corenrn_param.celsius; #if CORENEURON_ENABLE_GPU if (!corenrn_param.gpu && corenrn_param.cell_interleave_permute == 2) { fprintf(stderr, "compiled with CORENEURON_ENABLE_GPU does not allow the combination of " "--cell-permute=2 and " "missing --gpu\n"); exit(1); } if (!corenrn_param.gpu && corenrn_param.cuda_interface) { fprintf(stderr, "compiled with OpenACC/CUDA does not allow the combination of --cuda-interface and " "missing --gpu\n"); exit(1); } #endif // if multi-threading enabled, make sure mpi library supports it #if NRNMPI if (corenrn_param.mpi_enable && corenrn_param.threading) { nrnmpi_check_threading_support(); } #endif // full path of files.dat file std::string filesdat(corenrn_param.datpath + "/" + corenrn_param.filesdat); // read the global variable names and set their values from globals.dat set_globals(corenrn_param.datpath.c_str(), (corenrn_param.seed >= 0), corenrn_param.seed); // set global variables for start time, timestep and temperature if (!corenrn_embedded) { t = checkPoints.restore_time(); } if (corenrn_param.dt != -1000.) { // command line arg highest precedence dt = corenrn_param.dt; } else if (dt == -1000.) { // not on command line and no dt in globals.dat dt = 0.025; // lowest precedence } corenrn_param.dt = dt; rev_dt = (int) (1. / dt); if (corenrn_param.celsius != -1000.) { // command line arg highest precedence celsius = corenrn_param.celsius; } else if (celsius == -1000.) { // not on command line and no celsius in globals.dat celsius = 34.0; // lowest precedence } corenrn_param.celsius = celsius; // create net_cvode instance mk_netcvode(); // One part done before call to nrn_setup. Other part after. if (!corenrn_param.patternstim.empty()) { nrn_set_extra_thread0_vdata(); } if (!corenrn_param.is_quiet()) { report_mem_usage("Before nrn_setup"); } // set if need to interleave cells interleave_permute_type = corenrn_param.cell_interleave_permute; cellorder_nwarp = corenrn_param.nwarp; use_solve_interleave = corenrn_param.cell_interleave_permute; if (corenrn_param.gpu && interleave_permute_type == 0) { if (nrnmpi_myid == 0) { printf( " WARNING : GPU execution requires --cell-permute type 1 or 2. Setting it to 1.\n"); } interleave_permute_type = 1; use_solve_interleave = true; } // multisend options use_multisend_ = corenrn_param.multisend ? 1 : 0; n_multisend_interval = corenrn_param.ms_subint; use_phase2_ = (corenrn_param.ms_phases == 2) ? 1 : 0; // reading *.dat files and setting up the data structures, setting mindelay nrn_setup(filesdat.c_str(), is_mapping_needed, checkPoints, run_setup_cleanup, corenrn_param.datpath.c_str(), checkPoints.get_restore_path().c_str(), &corenrn_param.mindelay); // Allgather spike compression and bin queuing. nrn_use_bin_queue_ = corenrn_param.binqueue; int spkcompress = corenrn_param.spkcompress; nrnmpi_spike_compress(spkcompress, (spkcompress ? true : false), use_multisend_); if (!corenrn_param.is_quiet()) { report_mem_usage("After nrn_setup "); } // Invoke PatternStim if (!corenrn_param.patternstim.empty()) { nrn_mkPatternStim(corenrn_param.patternstim.c_str(), corenrn_param.tstop); } /// Setting the timeout nrn_set_timeout(200.); // show all configuration parameters for current run if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) { std::cout << corenrn_param << std::endl; std::cout << " Start time (t) = " << t << std::endl << std::endl; } // allocate buffer for mpi communication mk_spikevec_buffer(corenrn_param.spikebuf); if (!corenrn_param.is_quiet()) { report_mem_usage("After mk_spikevec_buffer"); } // In direct mode there are likely trajectory record requests // to allow processing in NEURON after simulation by CoreNEURON if (corenrn_embedded) { // arg is additional vector size required (how many items will be // written to the double*) but NEURON can instead // specify that returns will be on a per time step basis. get_nrn_trajectory_requests(int((corenrn_param.tstop - t) / corenrn_param.dt) + 2); // In direct mode, CoreNEURON has exactly the behavior of // ParallelContext.psolve(tstop). Ie a sequence of such calls // without an intervening h.finitialize() continues from the end // of the previous call. I.e., all initial state, including // the event queue has been set up in NEURON. And, at the end // all final state, including the event queue will be sent back // to NEURON. Here there is some first time only // initialization and queue transfer. direct_mode_initialize(); clear_spike_vectors(); // PreSyn send already recorded by NEURON (*nrn2core_part2_clean_)(); } if (corenrn_param.gpu) { // Copy nrnthreads to device only after all the data are passed from NEURON and the // nrnthreads on CPU are properly set up setup_nrnthreads_on_device(nrn_threads, nrn_nthread); } if (corenrn_embedded) { // Run nrn_init of mechanisms only to allocate any extra data needed on the GPU after // nrnthreads are properly set up on the GPU allocate_data_in_mechanism_nrn_init(); } if (corenrn_param.gpu) { if (nrn_have_gaps) { nrn_partrans::copy_gap_indices_to_device(); } } // call prcellstate for prcellgid call_prcellstate_for_prcellgid(corenrn_param.prcellgid, corenrn_param.gpu, 1); } void call_prcellstate_for_prcellgid(int prcellgid, int compute_gpu, int is_init) { char prcellname[1024]; #ifdef ENABLE_CUDA const char* prprefix = "cu"; #else const char* prprefix = "acc"; #endif if (prcellgid >= 0) { if (compute_gpu) { if (is_init) sprintf(prcellname, "%s_gpu_init", prprefix); else sprintf(prcellname, "%s_gpu_t%f", prprefix, t); } else { if (is_init) strcpy(prcellname, "cpu_init"); else sprintf(prcellname, "cpu_t%f", t); } update_nrnthreads_on_host(nrn_threads, nrn_nthread); prcellstate(prcellgid, prcellname); } } /* perform forwardskip and call prcellstate for prcellgid */ void handle_forward_skip(double forwardskip, int prcellgid) { double savedt = dt; double savet = t; dt = forwardskip * 0.1; t = -1e9; dt2thread(-1.); for (int step = 0; step < 10; ++step) { nrn_fixed_step_minimal(); } if (prcellgid >= 0) { prcellstate(prcellgid, "fs"); } dt = savedt; t = savet; dt2thread(-1.); // clear spikes generated during forward skip (with negative time) clear_spike_vectors(); } std::string cnrn_version() { return version::to_string(); } static void trajectory_return() { if (nrn2core_trajectory_return_) { for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread& nt = nrn_threads[tid]; TrajectoryRequests* tr = nt.trajec_requests; if (tr && tr->varrays) { (*nrn2core_trajectory_return_)(tid, tr->n_pr, tr->bsize, tr->vsize, tr->vpr, nt._t); } } } } std::unique_ptr create_report_handler(const ReportConfiguration& config, const SpikesInfo& spikes_info) { std::unique_ptr report_handler; if (config.format == "Bin") { report_handler = std::make_unique(); } else if (config.format == "SONATA") { report_handler = std::make_unique(spikes_info); } else { if (nrnmpi_myid == 0) { printf(" WARNING : Report name '%s' has unknown format: '%s'.\n", config.name.data(), config.format.data()); } return nullptr; } return report_handler; } } // namespace coreneuron /// The following high-level functions are marked as "extern C" /// for compat with C, namely Neuron mod files. /// They split the previous solve_core so that intermediate init of external mechanisms can occur. /// See mech/corenrnmech.cpp for the new all-in-one solve_core (not compiled into the coreneuron /// lib since with nrnivmodl-core we have 'future' external mechanisms) using namespace coreneuron; #if NRNMPI && defined(CORENEURON_ENABLE_MPI_DYNAMIC) static void* load_dynamic_mpi(const std::string& libname) { dlerror(); void* handle = dlopen(libname.c_str(), RTLD_NOW | RTLD_GLOBAL); const char* error = dlerror(); if (error) { std::string err_msg = std::string("Could not open dynamic MPI library: ") + error + "\n"; throw std::runtime_error(err_msg); } return handle; } #endif extern "C" void mk_mech_init(int argc, char** argv) { // reset all parameters to their default values corenrn_param.reset(); // read command line parameters and parameter config files corenrn_param.parse(argc, argv); #if NRNMPI if (corenrn_param.mpi_enable) { #ifdef CORENEURON_ENABLE_MPI_DYNAMIC // coreneuron rely on neuron to detect mpi library distribution and // the name of the library itself. Make sure the library name is specified // via CLI option. if (corenrn_param.mpi_lib.empty()) { throw std::runtime_error( "For dynamic MPI support you must pass '--mpi-lib " "/path/libcorenrnmpi_.` argument!\n"); } // neuron can call coreneuron multiple times and hence we do not // want to initialize/load mpi library multiple times static bool mpi_lib_loaded = false; if (!mpi_lib_loaded) { auto mpi_handle = load_dynamic_mpi(corenrn_param.mpi_lib); mpi_manager().resolve_symbols(mpi_handle); mpi_lib_loaded = true; } #endif auto ret = nrnmpi_init(&argc, &argv, corenrn_param.is_quiet()); nrnmpi_numprocs = ret.numprocs; nrnmpi_myid = ret.myid; } #endif #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { init_gpu(); cnrn_target_copyin(&celsius); cnrn_target_copyin(&pi); cnrn_target_copyin(&secondorder); nrnran123_initialise_global_state_on_device(); } #endif if (!corenrn_param.writeParametersFilepath.empty()) { std::ofstream out(corenrn_param.writeParametersFilepath, std::ios::trunc); out << corenrn_param.config_to_str(false, false); out.close(); } // reads mechanism information from bbcore_mech.dat mk_mech((corenrn_param.datpath).c_str()); } extern "C" int run_solve_core(int argc, char** argv) { Instrumentor::phase_begin("main"); std::vector configs; std::vector> report_handlers; SpikesInfo spikes_info; bool reports_needs_finalize = false; if (!corenrn_param.is_quiet()) { report_mem_usage("After mk_mech"); } // Create outpath if it does not exist if (nrnmpi_myid == 0) { mkdir_p(corenrn_param.outpath.c_str()); } if (!corenrn_param.reportfilepath.empty()) { configs = create_report_configurations(corenrn_param.reportfilepath, corenrn_param.outpath, spikes_info); reports_needs_finalize = !configs.empty(); } CheckPoints checkPoints{corenrn_param.checkpointpath, corenrn_param.restorepath}; // initializationa and loading functions moved to separate { Instrumentor::phase p("load-model"); nrn_init_and_load_data(argc, argv, checkPoints, !configs.empty()); } std::string output_dir = corenrn_param.outpath; if (nrnmpi_myid == 0) { mkdir_p(output_dir.c_str()); } #if NRNMPI if (corenrn_param.mpi_enable) { nrnmpi_barrier(); } #endif bool compute_gpu = corenrn_param.gpu; nrn_pragma_acc(update device(celsius, secondorder, pi) if (compute_gpu)) nrn_pragma_omp(target update to(celsius, secondorder, pi) if (compute_gpu)) { double v = corenrn_param.voltage; double dt = corenrn_param.dt; double delay = corenrn_param.mindelay; double tstop = corenrn_param.tstop; if (tstop < t && nrnmpi_myid == 0) { printf("Error: Stop time (%lf) < Start time (%lf), restoring from checkpoint? \n", tstop, t); abort(); } // TODO : if some ranks are empty then restore will go in deadlock // phase (as some ranks won't have restored anything and hence return // false in checkpoint_initialize if (!corenrn_embedded && !checkPoints.initialize()) { nrn_finitialize(v != 1000., v); } if (!corenrn_param.is_quiet()) { report_mem_usage("After nrn_finitialize"); } // register all reports into reportinglib double min_report_dt = INT_MAX; for (size_t i = 0; i < configs.size(); i++) { std::unique_ptr report_handler = create_report_handler(configs[i], spikes_info); if (report_handler) { report_handler->create_report(configs[i], dt, tstop, delay); report_handlers.push_back(std::move(report_handler)); } if (configs[i].report_dt < min_report_dt) { min_report_dt = configs[i].report_dt; } } // Set the buffer size if is not the default value. Otherwise use report.conf on // register_report if (corenrn_param.report_buff_size != corenrn_param.report_buff_size_default) { set_report_buffer_size(corenrn_param.report_buff_size); } if (!configs.empty()) { setup_report_engine(min_report_dt, delay); configs.clear(); } // call prcellstate for prcellgid call_prcellstate_for_prcellgid(corenrn_param.prcellgid, compute_gpu, 0); // handle forwardskip if (corenrn_param.forwardskip > 0.0) { Instrumentor::phase p("handle-forward-skip"); handle_forward_skip(corenrn_param.forwardskip, corenrn_param.prcellgid); } /// Solver execution Instrumentor::start_profile(); Instrumentor::phase_begin("simulation"); BBS_netpar_solve(corenrn_param.tstop); Instrumentor::phase_end("simulation"); Instrumentor::stop_profile(); // update cpu copy of NrnThread from GPU update_nrnthreads_on_host(nrn_threads, nrn_nthread); // direct mode and full trajectory gathering on CoreNEURON, send back. if (corenrn_embedded) { trajectory_return(); } // Report global cell statistics if (!corenrn_param.is_quiet()) { report_cell_stats(); } // prcellstate after end of solver call_prcellstate_for_prcellgid(corenrn_param.prcellgid, compute_gpu, 0); } // write spike information to outpath { Instrumentor::phase p("output-spike"); output_spikes(output_dir.c_str(), spikes_info); } // copy weights back to NEURON NetCon if (nrn2core_all_weights_return_) { // first update weights from gpu update_weights_from_gpu(nrn_threads, nrn_nthread); // store weight pointers std::vector weights(nrn_nthread, nullptr); // could be one thread more (empty) than in NEURON but does not matter for (int i = 0; i < nrn_nthread; ++i) { weights[i] = nrn_threads[i].weights; } (*nrn2core_all_weights_return_)(weights); } core2nrn_data_return(); { Instrumentor::phase p("checkpoint"); checkPoints.write_checkpoint(nrn_threads, nrn_nthread); } // must be done after checkpoint (to avoid deleting events) if (reports_needs_finalize) { finalize_report(); } // cleanup threads on GPU if (corenrn_param.gpu) { delete_nrnthreads_on_device(nrn_threads, nrn_nthread); if (nrn_have_gaps) { nrn_partrans::delete_gap_indices_from_device(); } nrnran123_destroy_global_state_on_device(); cnrn_target_delete(&secondorder); cnrn_target_delete(&pi); cnrn_target_delete(&celsius); } // Cleaning the memory nrn_cleanup(); // tau needs to resume profile Instrumentor::start_profile(); // mpi finalize #if NRNMPI if (corenrn_param.mpi_enable && !corenrn_param.skip_mpi_finalize) { nrnmpi_finalize(); } #endif Instrumentor::phase_end("main"); return 0; } ================================================ FILE: coreneuron/config/config.cpp.in ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/config/config.h" /// Git version of the project const std::string coreneuron::version::GIT_REVISION = "@CN_GIT_REVISION@"; /// CoreNEURON version const std::string coreneuron::version::CORENEURON_VERSION = "@CN_PROJECT_VERSION@"; ================================================ FILE: coreneuron/config/config.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once /** * \dir * \brief Global project configurations * * \file * \brief Version information */ #include namespace coreneuron { /** * \brief Project version information */ struct version { /// git revision id static const std::string GIT_REVISION; /// project tagged version in the cmake static const std::string CORENEURON_VERSION; /// return version string (version + git id) as a string static std::string to_string() { return CORENEURON_VERSION + " " + GIT_REVISION; } }; } // namespace coreneuron ================================================ FILE: coreneuron/config/neuron_version.hpp.in ================================================ /* # ============================================================================= # Copyright (c) 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once // This is the CoreNEURON analogue of nrnsemanticversion.h in NEURON. Hopefully // the duplication can go away soon. #define NRN_VERSION_MAJOR @NRN_VERSION_MAJOR@ #define NRN_VERSION_MINOR @NRN_VERSION_MINOR@ #define NRN_VERSION_PATCH @NRN_VERSION_PATCH@ ================================================ FILE: coreneuron/config/version_macros.hpp ================================================ /* # ============================================================================= # Copyright (c) 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once // This is the CoreNEURON analogue of nrnversionmacros.h in NEURON. Hopefully // the duplication can go away soon. #include "coreneuron/config/neuron_version.hpp" #define NRN_VERSION_INT(maj, min, pat) (10000 * maj + 100 * min + pat) #define NRN_VERSION NRN_VERSION_INT(NRN_VERSION_MAJOR, NRN_VERSION_MINOR, NRN_VERSION_PATCH) #define NRN_VERSION_EQ(maj, min, pat) (NRN_VERSION == NRN_VERSION_INT(maj, min, pat)) #define NRN_VERSION_NE(maj, min, pat) (NRN_VERSION != NRN_VERSION_INT(maj, min, pat)) #define NRN_VERSION_GT(maj, min, pat) (NRN_VERSION > NRN_VERSION_INT(maj, min, pat)) #define NRN_VERSION_LT(maj, min, pat) (NRN_VERSION < NRN_VERSION_INT(maj, min, pat)) #define NRN_VERSION_GTEQ(maj, min, pat) (NRN_VERSION >= NRN_VERSION_INT(maj, min, pat)) #define NRN_VERSION_LTEQ(maj, min, pat) (NRN_VERSION <= NRN_VERSION_INT(maj, min, pat)) // 8.2.0 is significant because all versions >=8.2.0 should contain definitions // of these macros, and doing #ifndef NRN_VERSION_GTEQ_8_2_0 is a more // descriptive way of writing #if defined(NRN_VERSION_GTEQ). Testing for 8.2.0 // is likely to be a common pattern when adapting MOD file VERBATIM blocks for // C++ compatibility. #if NRN_VERSION_GTEQ(8, 2, 0) #define NRN_VERSION_GTEQ_8_2_0 #endif ================================================ FILE: coreneuron/coreneuron.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once /*** * Includes all headers required to communicate and run all methods * described in CoreNEURON, neurox, and mod2c C-generated mechanisms * functions. **/ #include #include #include #include #include #include #include "coreneuron/utils/randoms/nrnran123.h" //Random Number Generator #include "coreneuron/sim/scopmath/newton_struct.h" //Newton Struct #include "coreneuron/membrane_definitions.h" //static definitions #include "coreneuron/mechanism/mechanism.hpp" //Memb_list and mechs info #include "coreneuron/utils/memory.h" //Memory alignments and padding #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mechanism/mech_mapping.hpp" namespace coreneuron { // from nrnoc/capac.c extern void nrn_init_capacitance(NrnThread*, Memb_list*, int); extern void nrn_cur_capacitance(NrnThread* _nt, Memb_list* ml, int type); extern void nrn_alloc_capacitance(double* data, Datum* pdata, int type); // from nrnoc/eion.c extern void nrn_init_ion(NrnThread*, Memb_list*, int); extern void nrn_cur_ion(NrnThread* _nt, Memb_list* ml, int type); extern void nrn_alloc_ion(double* data, Datum* pdata, int type); extern void second_order_cur(NrnThread* _nt, int secondorder); using DependencyTable = std::vector>; /** * A class representing the CoreNEURON state, holding pointers to the various data structures * * The pointers to "global" data such as the NrnThread, Memb_list and Memb_func data structures * are managed here. they logically share their lifetime and runtime scope with instances of * this class. */ class CoreNeuron { /** * map if mech is a point process * In the future only a field of Mechanism class */ std::vector pnt_map; /* so prop_free can know its a point mech*/ /** Vector mapping the types (IDs) of different mechanisms of mod files between NEURON and * CoreNEURON */ std::vector different_mechanism_type; /** * dependency helper filled by calls to hoc_register_dparam_semantics * used when nrn_mech_depend is called * vector-of-vector DS. First idx is the mech, second idx is the dependent mech. */ DependencyTable ion_write_dependency; std::vector memb_funcs; /** * Net send / Net receive * only used in CoreNEURON for book keeping synapse mechs, should go into CoreNEURON class */ std::vector> net_buf_receive; std::vector net_buf_send_type; /** * before-after-blocks from nmodl are registered here as function pointers */ std::array bamech; /** * Internal lookup tables. Number of float and int variables in each mechanism and memory layout * future --> mech class */ std::vector nrn_prop_param_size; std::vector nrn_prop_dparam_size; std::vector nrn_mech_data_layout; /* 1 AoS (default), 0 SoA */ /* array is parallel to memb_func. All are 0 except 1 for ARTIFICIAL_CELL */ std::vector nrn_artcell_qindex; std::vector nrn_is_artificial; /** * Net Receive function pointer lookup tables */ std::vector pnt_receive; /* for synaptic events. */ std::vector pnt_receive_init; std::vector pnt_receive_size; /** * Holds function pointers for WATCH callback */ std::vector nrn_watch_check; /** * values are type numbers of mechanisms which do net_send call * related to NMODL net_event() * */ std::vector nrn_has_net_event; /** * inverse of nrn_has_net_event_ maps the values of nrn_has_net_event_ to the index of * ptntype2presyn */ std::vector pnttype2presyn; std::vector nrn_bbcore_read; std::vector nrn_bbcore_write; public: auto& get_memb_funcs() { return memb_funcs; } auto& get_memb_func(size_t idx) { return memb_funcs[idx]; } auto& get_different_mechanism_type() { return different_mechanism_type; } auto& get_pnt_map() { return pnt_map; } auto& get_ion_write_dependency() { return ion_write_dependency; } auto& get_net_buf_receive() { return net_buf_receive; } auto& get_net_buf_send_type() { return net_buf_send_type; } auto& get_bamech() { return bamech; } auto& get_prop_param_size() { return nrn_prop_param_size; } auto& get_prop_dparam_size() { return nrn_prop_dparam_size; } auto& get_mech_data_layout() { return nrn_mech_data_layout; } auto& get_is_artificial() { return nrn_is_artificial; } auto& get_artcell_qindex() { return nrn_artcell_qindex; } auto& get_pnt_receive() { return pnt_receive; } auto& get_pnt_receive_init() { return pnt_receive_init; } auto& get_pnt_receive_size() { return pnt_receive_size; } auto& get_watch_check() { return nrn_watch_check; } auto& get_has_net_event() { return nrn_has_net_event; } auto& get_pnttype2presyn() { return pnttype2presyn; } auto& get_bbcore_read() { return nrn_bbcore_read; } auto& get_bbcore_write() { return nrn_bbcore_write; } }; extern CoreNeuron corenrn; } // namespace coreneuron ================================================ FILE: coreneuron/engine.h.in ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once // Use MAJOR.MINOR for public version #define CORENEURON_VERSION @CORENEURON_VERSION_COMBINED@ #ifdef __cplusplus extern "C" { #endif /// All-in-one initialization of mechanisms and solver extern int solve_core(int argc, char** argv); /// Initialize mechanisms extern void mk_mech_init(int argc, char** argv); /// Run core solver extern int run_solve_core(int argc, char** argv); #ifdef __cplusplus } #endif ================================================ FILE: coreneuron/gpu/nrn_acc_manager.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/utils/vrecitem.h" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/permute/data_layout.hpp" #include "coreneuron/sim/scopmath/newton_struct.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/mpi/nrnmpidec.h" #include "coreneuron/utils/utils.hpp" #ifdef CRAYPAT #include #endif #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #include #endif #if __has_include() #define USE_CXXABI #include #include #include #endif #ifdef CORENEURON_ENABLE_PRESENT_TABLE #include #include #include #include #include namespace { struct present_table_value { std::size_t ref_count{}, size{}; std::byte* dev_ptr{}; }; std::map present_table; std::shared_mutex present_table_mutex; } // namespace #endif namespace { /** @brief Try to demangle a type name, return the mangled name on failure. */ std::string cxx_demangle(const char* mangled) { #ifdef USE_CXXABI int status{}; // Note that the third argument to abi::__cxa_demangle returns the length of // the allocated buffer, which may be larger than strlen(demangled) + 1. std::unique_ptr demangled{ abi::__cxa_demangle(mangled, nullptr, nullptr, &status), free}; return status ? mangled : demangled.get(); #else return mangled; #endif } bool cnrn_target_debug_output_enabled() { const char* env = std::getenv("CORENEURON_GPU_DEBUG"); if (!env) { return false; } std::string env_s{env}; if (env_s == "1") { return true; } else if (env_s == "0") { return false; } else { throw std::runtime_error("CORENEURON_GPU_DEBUG must be set to 0 or 1 (got " + env_s + ")"); } } bool cnrn_target_enable_debug{cnrn_target_debug_output_enabled()}; } // namespace namespace coreneuron { extern InterleaveInfo* interleave_info; void nrn_ion_global_map_copyto_device(); void nrn_ion_global_map_delete_from_device(); void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay); void nrn_VecPlay_delete_from_device(NrnThread* nt); void cnrn_target_copyin_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, std::size_t len, void* d_ptr) { if (!cnrn_target_enable_debug) { return; } std::cerr << file << ':' << line << ": cnrn_target_copyin<" << cxx_demangle(typeid_T.name()) << ">(" << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T << ") -> " << d_ptr << std::endl; } void cnrn_target_delete_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, std::size_t len) { if (!cnrn_target_enable_debug) { return; } std::cerr << file << ':' << line << ": cnrn_target_delete<" << cxx_demangle(typeid_T.name()) << ">(" << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T << ')' << std::endl; } void cnrn_target_deviceptr_debug(std::string_view file, int line, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr) { if (!cnrn_target_enable_debug) { return; } std::cerr << file << ':' << line << ": cnrn_target_deviceptr<" << cxx_demangle(typeid_T.name()) << ">(" << h_ptr << ") -> " << d_ptr << std::endl; } void cnrn_target_is_present_debug(std::string_view file, int line, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr) { if (!cnrn_target_enable_debug) { return; } std::cerr << file << ':' << line << ": cnrn_target_is_present<" << cxx_demangle(typeid_T.name()) << ">(" << h_ptr << ") -> " << d_ptr << std::endl; } void cnrn_target_memcpy_to_device_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, std::size_t len, void* d_ptr) { if (!cnrn_target_enable_debug) { return; } std::cerr << file << ':' << line << ": cnrn_target_memcpy_to_device<" << cxx_demangle(typeid_T.name()) << ">(" << d_ptr << ", " << h_ptr << ", " << len << " * " << sizeof_T << " = " << len * sizeof_T << ')' << std::endl; } #ifdef CORENEURON_ENABLE_PRESENT_TABLE std::pair cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr) { if (!h_ptr) { return {nullptr, false}; } // Concurrent calls to this method are safe, but they must be serialised // w.r.t. calls to the cnrn_target_*_update_present_table methods. std::shared_lock _{present_table_mutex}; if (present_table.empty()) { return {nullptr, must_be_present_or_null}; } // prev(first iterator greater than h_ptr or last if not found) gives the first iterator less // than or equal to h_ptr auto const iter = std::prev(std::upper_bound( present_table.begin(), present_table.end(), h_ptr, [](void const* hp, auto const& entry) { return hp < entry.first; })); if (iter == present_table.end()) { return {nullptr, must_be_present_or_null}; } std::byte const* const h_byte_ptr{static_cast(h_ptr)}; std::byte const* const h_start_of_block{iter->first}; std::size_t const block_size{iter->second.size}; std::byte* const d_start_of_block{iter->second.dev_ptr}; bool const is_present{h_byte_ptr < h_start_of_block + block_size}; if (!is_present) { return {nullptr, must_be_present_or_null}; } return {d_start_of_block + (h_byte_ptr - h_start_of_block), false}; } void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len) { if (!h_ptr) { assert(!d_ptr); return; } std::lock_guard _{present_table_mutex}; // TODO include more pedantic overlap checking? present_table_value new_val{}; new_val.size = len; new_val.ref_count = 1; new_val.dev_ptr = static_cast(d_ptr); auto const [iter, inserted] = present_table.emplace(static_cast(h_ptr), std::move(new_val)); if (!inserted) { // Insertion didn't occur because h_ptr was already in the present table assert(iter->second.size == len); assert(iter->second.dev_ptr == new_val.dev_ptr); ++(iter->second.ref_count); } } void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len) { if (!h_ptr) { return; } std::lock_guard _{present_table_mutex}; auto const iter = present_table.find(static_cast(h_ptr)); assert(iter != present_table.end()); assert(iter->second.size == len); --(iter->second.ref_count); if (iter->second.ref_count == 0) { present_table.erase(iter); } } #endif int cnrn_target_get_num_devices() { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) // choose nvidia GPU by default acc_device_t device_type = acc_device_nvidia; // check how many gpu devices available per node return acc_get_num_devices(device_type); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) return omp_get_num_devices(); #else throw std::runtime_error( "cnrn_target_get_num_devices() not implemented without OpenACC/OpenMP and gpu build"); #endif } void cnrn_target_set_default_device(int device_num) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) acc_set_device_num(device_num, acc_device_nvidia); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) omp_set_default_device(device_num); // It seems that with NVHPC 21.9 then only setting the default OpenMP device // is not enough: there were errors on some nodes when not-the-0th GPU was // used. These seemed to be related to the NMODL instance structs, which are // allocated using cudaMallocManaged. auto const cuda_code = cudaSetDevice(device_num); assert(cuda_code == cudaSuccess); #else throw std::runtime_error( "cnrn_target_set_default_device() not implemented without OpenACC/OpenMP and gpu build"); #endif } #ifdef CORENEURON_ENABLE_GPU #ifndef CORENEURON_UNIFIED_MEMORY static Memb_list* copy_ml_to_device(const Memb_list* ml, int type) { // As we never run code for artificial cell inside GPU we don't copy it. int is_art = corenrn.get_is_artificial()[type]; if (is_art) { return nullptr; } auto d_ml = cnrn_target_copyin(ml); if (ml->global_variables) { assert(ml->global_variables_size); void* d_inst = cnrn_target_copyin(static_cast(ml->global_variables), ml->global_variables_size); cnrn_target_memcpy_to_device(&(d_ml->global_variables), &d_inst); } int n = ml->nodecount; int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; double* dptr = cnrn_target_deviceptr(ml->data); cnrn_target_memcpy_to_device(&(d_ml->data), &(dptr)); int* d_nodeindices = cnrn_target_copyin(ml->nodeindices, n); cnrn_target_memcpy_to_device(&(d_ml->nodeindices), &d_nodeindices); if (szdp) { int pcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szdp; int* d_pdata = cnrn_target_copyin(ml->pdata, pcnt); cnrn_target_memcpy_to_device(&(d_ml->pdata), &d_pdata); } int ts = corenrn.get_memb_funcs()[type].thread_size_; if (ts) { ThreadDatum* td = cnrn_target_copyin(ml->_thread, ts); cnrn_target_memcpy_to_device(&(d_ml->_thread), &td); } // net_receive buffer associated with mechanism NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; // if net receive buffer exist for mechanism if (nrb) { NetReceiveBuffer_t* d_nrb = cnrn_target_copyin(nrb); cnrn_target_memcpy_to_device(&(d_ml->_net_receive_buffer), &d_nrb); int* d_pnt_index = cnrn_target_copyin(nrb->_pnt_index, nrb->_size); cnrn_target_memcpy_to_device(&(d_nrb->_pnt_index), &d_pnt_index); int* d_weight_index = cnrn_target_copyin(nrb->_weight_index, nrb->_size); cnrn_target_memcpy_to_device(&(d_nrb->_weight_index), &d_weight_index); double* d_nrb_t = cnrn_target_copyin(nrb->_nrb_t, nrb->_size); cnrn_target_memcpy_to_device(&(d_nrb->_nrb_t), &d_nrb_t); double* d_nrb_flag = cnrn_target_copyin(nrb->_nrb_flag, nrb->_size); cnrn_target_memcpy_to_device(&(d_nrb->_nrb_flag), &d_nrb_flag); int* d_displ = cnrn_target_copyin(nrb->_displ, nrb->_size + 1); cnrn_target_memcpy_to_device(&(d_nrb->_displ), &d_displ); int* d_nrb_index = cnrn_target_copyin(nrb->_nrb_index, nrb->_size); cnrn_target_memcpy_to_device(&(d_nrb->_nrb_index), &d_nrb_index); } /* copy NetSendBuffer_t on to GPU */ NetSendBuffer_t* nsb = ml->_net_send_buffer; if (nsb) { NetSendBuffer_t* d_nsb; int* d_iptr; double* d_dptr; d_nsb = cnrn_target_copyin(nsb); cnrn_target_memcpy_to_device(&(d_ml->_net_send_buffer), &d_nsb); d_iptr = cnrn_target_copyin(nsb->_sendtype, nsb->_size); cnrn_target_memcpy_to_device(&(d_nsb->_sendtype), &d_iptr); d_iptr = cnrn_target_copyin(nsb->_vdata_index, nsb->_size); cnrn_target_memcpy_to_device(&(d_nsb->_vdata_index), &d_iptr); d_iptr = cnrn_target_copyin(nsb->_pnt_index, nsb->_size); cnrn_target_memcpy_to_device(&(d_nsb->_pnt_index), &d_iptr); d_iptr = cnrn_target_copyin(nsb->_weight_index, nsb->_size); cnrn_target_memcpy_to_device(&(d_nsb->_weight_index), &d_iptr); d_dptr = cnrn_target_copyin(nsb->_nsb_t, nsb->_size); cnrn_target_memcpy_to_device(&(d_nsb->_nsb_t), &d_dptr); d_dptr = cnrn_target_copyin(nsb->_nsb_flag, nsb->_size); cnrn_target_memcpy_to_device(&(d_nsb->_nsb_flag), &d_dptr); } return d_ml; } #endif static void update_ml_on_host(const Memb_list* ml, int type) { int is_art = corenrn.get_is_artificial()[type]; if (is_art) { // Artificial mechanisms such as PatternStim and IntervalFire // are not copied onto the GPU. They should not, therefore, be // updated from the GPU. return; } int n = ml->nodecount; int szp = corenrn.get_prop_param_size()[type]; int szdp = corenrn.get_prop_dparam_size()[type]; int pcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szp; nrn_pragma_acc(update self(ml->data[:pcnt], ml->nodeindices[:n])) nrn_pragma_omp(target update from(ml->data[:pcnt], ml->nodeindices[:n])) int dpcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szdp; nrn_pragma_acc(update self(ml->pdata[:dpcnt]) if (szdp)) nrn_pragma_omp(target update from(ml->pdata[:dpcnt]) if (szdp)) auto nrb = ml->_net_receive_buffer; // clang-format off nrn_pragma_acc(update self(nrb->_cnt, nrb->_size, nrb->_pnt_offset, nrb->_displ_cnt, nrb->_pnt_index[:nrb->_size], nrb->_weight_index[:nrb->_size], nrb->_displ[:nrb->_size + 1], nrb->_nrb_index[:nrb->_size]) if (nrb != nullptr)) nrn_pragma_omp(target update from(nrb->_cnt, nrb->_size, nrb->_pnt_offset, nrb->_displ_cnt, nrb->_pnt_index[:nrb->_size], nrb->_weight_index[:nrb->_size], nrb->_displ[:nrb->_size + 1], nrb->_nrb_index[:nrb->_size]) if (nrb != nullptr)) // clang-format on } static void delete_ml_from_device(Memb_list* ml, int type) { int is_art = corenrn.get_is_artificial()[type]; if (is_art) { return; } // Cleanup the net send buffer if it exists { NetSendBuffer_t* nsb{ml->_net_send_buffer}; if (nsb) { cnrn_target_delete(nsb->_nsb_flag, nsb->_size); cnrn_target_delete(nsb->_nsb_t, nsb->_size); cnrn_target_delete(nsb->_weight_index, nsb->_size); cnrn_target_delete(nsb->_pnt_index, nsb->_size); cnrn_target_delete(nsb->_vdata_index, nsb->_size); cnrn_target_delete(nsb->_sendtype, nsb->_size); cnrn_target_delete(nsb); } } // Cleanup the net receive buffer if it exists. { NetReceiveBuffer_t* nrb{ml->_net_receive_buffer}; if (nrb) { cnrn_target_delete(nrb->_nrb_index, nrb->_size); cnrn_target_delete(nrb->_displ, nrb->_size + 1); cnrn_target_delete(nrb->_nrb_flag, nrb->_size); cnrn_target_delete(nrb->_nrb_t, nrb->_size); cnrn_target_delete(nrb->_weight_index, nrb->_size); cnrn_target_delete(nrb->_pnt_index, nrb->_size); cnrn_target_delete(nrb); } } int n = ml->nodecount; int szdp = corenrn.get_prop_dparam_size()[type]; int ts = corenrn.get_memb_funcs()[type].thread_size_; if (ts) { cnrn_target_delete(ml->_thread, ts); } if (szdp) { int pcnt = nrn_soa_padded_size(n, SOA_LAYOUT) * szdp; cnrn_target_delete(ml->pdata, pcnt); } cnrn_target_delete(ml->nodeindices, n); if (ml->global_variables) { assert(ml->global_variables_size); cnrn_target_delete(static_cast(ml->global_variables), ml->global_variables_size); } cnrn_target_delete(ml); } #endif /* note: threads here are corresponding to global nrn_threads array */ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) { #ifdef CORENEURON_ENABLE_GPU // initialize NrnThreads for gpu execution // empty thread or only artificial cells should be on cpu for (int i = 0; i < nthreads; i++) { NrnThread* nt = threads + i; nt->compute_gpu = (nt->end > 0) ? 1 : 0; nt->_dt = dt; } nrn_ion_global_map_copyto_device(); #ifdef CORENEURON_UNIFIED_MEMORY for (int i = 0; i < nthreads; i++) { NrnThread* nt = threads + i; // NrnThread on host if (nt->n_presyn) { PreSyn* d_presyns = cnrn_target_copyin(nt->presyns, nt->n_presyn); } if (nt->n_vecplay) { /* copy VecPlayContinuous instances */ /** just empty containers */ void** d_vecplay = cnrn_target_copyin(nt->_vecplay, nt->n_vecplay); // note: we are using unified memory for NrnThread. Once VecPlay is copied to gpu, // we dont want to update nt->vecplay because it will also set gpu pointer of vecplay // inside nt on cpu (due to unified memory). nrn_VecPlay_copyto_device(nt, d_vecplay); } if (!nt->_permute && nt->end > 0) { printf("\n WARNING: NrnThread %d not permuted, error for linear algebra?", i); } } #else /* -- copy NrnThread to device. this needs to be contigious vector because offset is used to * find * corresponding NrnThread using Point_process in NET_RECEIVE block */ NrnThread* d_threads = cnrn_target_copyin(threads, nthreads); if (interleave_info == nullptr) { printf("\n Warning: No permutation data? Required for linear algebra!"); } /* pointers for data struct on device, starting with d_ */ for (int i = 0; i < nthreads; i++) { NrnThread* nt = threads + i; // NrnThread on host NrnThread* d_nt = d_threads + i; // NrnThread on device if (!nt->compute_gpu) { continue; } double* d__data; // nrn_threads->_data on device /* -- copy _data to device -- */ /*copy all double data for thread */ d__data = cnrn_target_copyin(nt->_data, nt->_ndata); /* Here is the example of using OpenACC data enter/exit * Remember that we are not allowed to use nt->_data but we have to use: * double *dtmp = nt->_data; // now use dtmp! #pragma acc enter data copyin(dtmp[0:nt->_ndata]) async(nt->stream_id) #pragma acc wait(nt->stream_id) */ /*update d_nt._data to point to device copy */ cnrn_target_memcpy_to_device(&(d_nt->_data), &d__data); /* -- setup rhs, d, a, b, v, node_aread to point to device copy -- */ double* dptr; /* for padding, we have to recompute ne */ int ne = nrn_soa_padded_size(nt->end, 0); dptr = d__data + 0 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_rhs), &(dptr)); dptr = d__data + 1 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_d), &(dptr)); dptr = d__data + 2 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_a), &(dptr)); dptr = d__data + 3 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_b), &(dptr)); dptr = d__data + 4 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_v), &(dptr)); dptr = d__data + 5 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_area), &(dptr)); if (nt->_actual_diam) { dptr = d__data + 6 * ne; cnrn_target_memcpy_to_device(&(d_nt->_actual_diam), &(dptr)); } int* d_v_parent_index = cnrn_target_copyin(nt->_v_parent_index, nt->end); cnrn_target_memcpy_to_device(&(d_nt->_v_parent_index), &(d_v_parent_index)); /* nt._ml_list is used in NET_RECEIVE block and should have valid membrane list id*/ Memb_list** d_ml_list = cnrn_target_copyin(nt->_ml_list, corenrn.get_memb_funcs().size()); cnrn_target_memcpy_to_device(&(d_nt->_ml_list), &(d_ml_list)); /* -- copy NrnThreadMembList list ml to device -- */ NrnThreadMembList* d_last_tml; bool first_tml = true; for (auto tml = nt->tml; tml; tml = tml->next) { /*copy tml to device*/ /*QUESTIONS: does tml will point to nullptr as in host ? : I assume so!*/ auto d_tml = cnrn_target_copyin(tml); /*first tml is pointed by nt */ if (first_tml) { cnrn_target_memcpy_to_device(&(d_nt->tml), &d_tml); first_tml = false; } else { /*rest of tml forms linked list */ cnrn_target_memcpy_to_device(&(d_last_tml->next), &d_tml); } // book keeping for linked-list d_last_tml = d_tml; /* now for every tml, there is a ml. copy that and setup pointer */ Memb_list* d_ml = copy_ml_to_device(tml->ml, tml->index); cnrn_target_memcpy_to_device(&(d_tml->ml), &d_ml); /* setup nt._ml_list */ cnrn_target_memcpy_to_device(&(d_ml_list[tml->index]), &d_ml); } if (nt->shadow_rhs_cnt) { double* d_shadow_ptr; int pcnt = nrn_soa_padded_size(nt->shadow_rhs_cnt, 0); /* copy shadow_rhs to device and fix-up the pointer */ d_shadow_ptr = cnrn_target_copyin(nt->_shadow_rhs, pcnt); cnrn_target_memcpy_to_device(&(d_nt->_shadow_rhs), &d_shadow_ptr); /* copy shadow_d to device and fix-up the pointer */ d_shadow_ptr = cnrn_target_copyin(nt->_shadow_d, pcnt); cnrn_target_memcpy_to_device(&(d_nt->_shadow_d), &d_shadow_ptr); } /* Fast membrane current calculation struct */ if (nt->nrn_fast_imem) { NrnFastImem* d_fast_imem = cnrn_target_copyin(nt->nrn_fast_imem); cnrn_target_memcpy_to_device(&(d_nt->nrn_fast_imem), &d_fast_imem); { double* d_ptr = cnrn_target_copyin(nt->nrn_fast_imem->nrn_sav_rhs, nt->end); cnrn_target_memcpy_to_device(&(d_fast_imem->nrn_sav_rhs), &d_ptr); } { double* d_ptr = cnrn_target_copyin(nt->nrn_fast_imem->nrn_sav_d, nt->end); cnrn_target_memcpy_to_device(&(d_fast_imem->nrn_sav_d), &d_ptr); } } if (nt->n_pntproc) { /* copy Point_processes array and fix the pointer to execute net_receive blocks on GPU */ Point_process* pntptr = cnrn_target_copyin(nt->pntprocs, nt->n_pntproc); cnrn_target_memcpy_to_device(&(d_nt->pntprocs), &pntptr); } if (nt->n_weight) { /* copy weight vector used in NET_RECEIVE which is pointed by netcon.weight */ double* d_weights = cnrn_target_copyin(nt->weights, nt->n_weight); cnrn_target_memcpy_to_device(&(d_nt->weights), &d_weights); } if (nt->_nvdata) { /* copy vdata which is setup in bbcore_read. This contains cuda allocated * nrnran123_State * */ void** d_vdata = cnrn_target_copyin(nt->_vdata, nt->_nvdata); cnrn_target_memcpy_to_device(&(d_nt->_vdata), &d_vdata); } if (nt->n_presyn) { /* copy presyn vector used for spike exchange, note we have added new PreSynHelper due * to issue * while updating PreSyn objects which has virtual base class. May be this is issue due * to * VTable and alignment */ PreSynHelper* d_presyns_helper = cnrn_target_copyin(nt->presyns_helper, nt->n_presyn); cnrn_target_memcpy_to_device(&(d_nt->presyns_helper), &d_presyns_helper); PreSyn* d_presyns = cnrn_target_copyin(nt->presyns, nt->n_presyn); cnrn_target_memcpy_to_device(&(d_nt->presyns), &d_presyns); } if (nt->_net_send_buffer_size) { /* copy send_receive buffer */ int* d_net_send_buffer = cnrn_target_copyin(nt->_net_send_buffer, nt->_net_send_buffer_size); cnrn_target_memcpy_to_device(&(d_nt->_net_send_buffer), &d_net_send_buffer); } if (nt->n_vecplay) { /* copy VecPlayContinuous instances */ /** just empty containers */ void** d_vecplay = cnrn_target_copyin(nt->_vecplay, nt->n_vecplay); cnrn_target_memcpy_to_device(&(d_nt->_vecplay), &d_vecplay); nrn_VecPlay_copyto_device(nt, d_vecplay); } if (nt->_permute) { if (interleave_permute_type == 1) { /* todo: not necessary to setup pointers, just copy it */ InterleaveInfo* info = interleave_info + i; int* d_ptr = nullptr; InterleaveInfo* d_info = cnrn_target_copyin(info); d_ptr = cnrn_target_copyin(info->stride, info->nstride + 1); cnrn_target_memcpy_to_device(&(d_info->stride), &d_ptr); d_ptr = cnrn_target_copyin(info->firstnode, nt->ncell); cnrn_target_memcpy_to_device(&(d_info->firstnode), &d_ptr); d_ptr = cnrn_target_copyin(info->lastnode, nt->ncell); cnrn_target_memcpy_to_device(&(d_info->lastnode), &d_ptr); d_ptr = cnrn_target_copyin(info->cellsize, nt->ncell); cnrn_target_memcpy_to_device(&(d_info->cellsize), &d_ptr); } else if (interleave_permute_type == 2) { /* todo: not necessary to setup pointers, just copy it */ InterleaveInfo* info = interleave_info + i; InterleaveInfo* d_info = cnrn_target_copyin(info); int* d_ptr = nullptr; d_ptr = cnrn_target_copyin(info->stride, info->nstride); cnrn_target_memcpy_to_device(&(d_info->stride), &d_ptr); d_ptr = cnrn_target_copyin(info->firstnode, info->nwarp + 1); cnrn_target_memcpy_to_device(&(d_info->firstnode), &d_ptr); d_ptr = cnrn_target_copyin(info->lastnode, info->nwarp + 1); cnrn_target_memcpy_to_device(&(d_info->lastnode), &d_ptr); d_ptr = cnrn_target_copyin(info->stridedispl, info->nwarp + 1); cnrn_target_memcpy_to_device(&(d_info->stridedispl), &d_ptr); d_ptr = cnrn_target_copyin(info->cellsize, info->nwarp); cnrn_target_memcpy_to_device(&(d_info->cellsize), &d_ptr); } else { printf("\n ERROR: only --cell_permute = [12] implemented"); abort(); } } else { printf("\n WARNING: NrnThread %d not permuted, error for linear algebra?", i); } { TrajectoryRequests* tr = nt->trajec_requests; if (tr) { // Create a device-side copy of the `trajec_requests` struct and // make sure the device-side NrnThread object knows about it. TrajectoryRequests* d_trajec_requests = cnrn_target_copyin(tr); cnrn_target_memcpy_to_device(&(d_nt->trajec_requests), &d_trajec_requests); // Initialise the double** gather member of the struct. double** d_tr_gather = cnrn_target_copyin(tr->gather, tr->n_trajec); cnrn_target_memcpy_to_device(&(d_trajec_requests->gather), &d_tr_gather); // Initialise the double** varrays member of the struct if it's // set. double** d_tr_varrays{nullptr}; if (tr->varrays) { d_tr_varrays = cnrn_target_copyin(tr->varrays, tr->n_trajec); cnrn_target_memcpy_to_device(&(d_trajec_requests->varrays), &d_tr_varrays); } for (int i = 0; i < tr->n_trajec; ++i) { if (tr->varrays) { // tr->varrays[i] is a buffer of tr->bsize doubles on the host, // make a device-side copy of it and store a pointer to it in // the device-side version of tr->varrays. double* d_buf_traj_i = cnrn_target_copyin(tr->varrays[i], tr->bsize); cnrn_target_memcpy_to_device(&(d_tr_varrays[i]), &d_buf_traj_i); } // tr->gather[i] is a double* referring to (host) data in the // (host) _data block auto* d_gather_i = cnrn_target_deviceptr(tr->gather[i]); cnrn_target_memcpy_to_device(&(d_tr_gather[i]), &d_gather_i); } // TODO: other `double** scatter` and `void** vpr` members of // the TrajectoryRequests struct are not copied to the device. // The `int vsize` member is updated during the simulation but // not kept up to date timestep-by-timestep on the device. } } { auto* d_fornetcon_perm_indices = cnrn_target_copyin(nt->_fornetcon_perm_indices, nt->_fornetcon_perm_indices_size); cnrn_target_memcpy_to_device(&(d_nt->_fornetcon_perm_indices), &d_fornetcon_perm_indices); } { auto* d_fornetcon_weight_perm = cnrn_target_copyin(nt->_fornetcon_weight_perm, nt->_fornetcon_weight_perm_size); cnrn_target_memcpy_to_device(&(d_nt->_fornetcon_weight_perm), &d_fornetcon_weight_perm); } } #endif #else (void) threads; (void) nthreads; #endif } void copy_ivoc_vect_to_device(const IvocVect& from, IvocVect& to) { #ifdef CORENEURON_ENABLE_GPU /// by default `to` is desitionation pointer on a device IvocVect* d_iv = &to; size_t n = from.size(); if (n) { double* d_data = cnrn_target_copyin(from.data(), n); cnrn_target_memcpy_to_device(&(d_iv->data_), &d_data); } #else (void) from; (void) to; #endif } void delete_ivoc_vect_from_device(IvocVect& vec) { #ifdef CORENEURON_ENABLE_GPU auto const n = vec.size(); if (n) { cnrn_target_delete(vec.data(), n); } #else static_cast(vec); #endif } void realloc_net_receive_buffer(NrnThread* nt, Memb_list* ml) { NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; if (!nrb) { return; } #ifdef CORENEURON_ENABLE_GPU if (nt->compute_gpu) { // free existing vectors in buffers on gpu cnrn_target_delete(nrb->_pnt_index, nrb->_size); cnrn_target_delete(nrb->_weight_index, nrb->_size); cnrn_target_delete(nrb->_nrb_t, nrb->_size); cnrn_target_delete(nrb->_nrb_flag, nrb->_size); cnrn_target_delete(nrb->_displ, nrb->_size + 1); cnrn_target_delete(nrb->_nrb_index, nrb->_size); } #endif // Reallocate host buffers using ecalloc_align (as in phase2.cpp) and // free_memory (as in nrn_setup.cpp) auto const realloc = [old_size = nrb->_size, nrb](auto*& ptr, std::size_t extra_size = 0) { using T = std::remove_pointer_t>; static_assert(std::is_trivial::value, "Only trivially constructible and copiable types are supported."); static_assert(std::is_same::value, "ptr should be reference-to-pointer"); auto* const new_data = static_cast(ecalloc_align((nrb->_size + extra_size), sizeof(T))); std::memcpy(new_data, ptr, (old_size + extra_size) * sizeof(T)); free_memory(ptr); ptr = new_data; }; nrb->_size *= 2; realloc(nrb->_pnt_index); realloc(nrb->_weight_index); realloc(nrb->_nrb_t); realloc(nrb->_nrb_flag); realloc(nrb->_displ, 1); realloc(nrb->_nrb_index); #ifdef CORENEURON_ENABLE_GPU if (nt->compute_gpu) { // update device copy nrn_pragma_acc(update device(nrb)); nrn_pragma_omp(target update to(nrb)); NetReceiveBuffer_t* const d_nrb{cnrn_target_deviceptr(nrb)}; // recopy the vectors in the buffer int* const d_pnt_index{cnrn_target_copyin(nrb->_pnt_index, nrb->_size)}; cnrn_target_memcpy_to_device(&(d_nrb->_pnt_index), &d_pnt_index); int* const d_weight_index{cnrn_target_copyin(nrb->_weight_index, nrb->_size)}; cnrn_target_memcpy_to_device(&(d_nrb->_weight_index), &d_weight_index); double* const d_nrb_t{cnrn_target_copyin(nrb->_nrb_t, nrb->_size)}; cnrn_target_memcpy_to_device(&(d_nrb->_nrb_t), &d_nrb_t); double* const d_nrb_flag{cnrn_target_copyin(nrb->_nrb_flag, nrb->_size)}; cnrn_target_memcpy_to_device(&(d_nrb->_nrb_flag), &d_nrb_flag); int* const d_displ{cnrn_target_copyin(nrb->_displ, nrb->_size + 1)}; cnrn_target_memcpy_to_device(&(d_nrb->_displ), &d_displ); int* const d_nrb_index{cnrn_target_copyin(nrb->_nrb_index, nrb->_size)}; cnrn_target_memcpy_to_device(&(d_nrb->_nrb_index), &d_nrb_index); } #endif } using NRB_P = std::pair; struct comp { bool operator()(const NRB_P& a, const NRB_P& b) { if (a.first == b.first) { return a.second > b.second; // same instances in original net_receive order } return a.first > b.first; } }; static void net_receive_buffer_order(NetReceiveBuffer_t* nrb) { Instrumentor::phase p_net_receive_buffer_order("net-receive-buf-order"); if (nrb->_cnt == 0) { nrb->_displ_cnt = 0; return; } std::priority_queue, comp> nrbq; for (int i = 0; i < nrb->_cnt; ++i) { nrbq.push(NRB_P(nrb->_pnt_index[i], i)); } int displ_cnt = 0; int index_cnt = 0; int last_instance_index = -1; nrb->_displ[0] = 0; while (!nrbq.empty()) { const NRB_P& p = nrbq.top(); nrb->_nrb_index[index_cnt++] = p.second; if (p.first != last_instance_index) { ++displ_cnt; } nrb->_displ[displ_cnt] = index_cnt; last_instance_index = p.first; nrbq.pop(); } nrb->_displ_cnt = displ_cnt; } /* when we execute NET_RECEIVE block on GPU, we provide the index of synapse instances * which we need to execute during the current timestep. In order to do this, we have * update NetReceiveBuffer_t object to GPU. When size of cpu buffer changes, we set * reallocated to true and hence need to reallocate buffer on GPU and then need to copy * entire buffer. If reallocated is 0, that means buffer size is not changed and hence * only need to copy _size elements to GPU. * Note: this is very preliminary implementation, optimisations will be done after first * functional version. */ void update_net_receive_buffer(NrnThread* nt) { Instrumentor::phase p_update_net_receive_buffer("update-net-receive-buf"); for (auto tml = nt->tml; tml; tml = tml->next) { int is_art = corenrn.get_is_artificial()[tml->index]; if (is_art) { continue; } // net_receive buffer to copy NetReceiveBuffer_t* nrb = tml->ml->_net_receive_buffer; // if net receive buffer exist for mechanism if (nrb && nrb->_cnt) { // instance order to avoid race. setup _displ and _nrb_index net_receive_buffer_order(nrb); if (nt->compute_gpu) { Instrumentor::phase p_net_receive_buffer_order("net-receive-buf-cpu2gpu"); // note that dont update nrb otherwise we lose pointers // clang-format off /* update scalar elements */ nrn_pragma_acc(update device(nrb->_cnt, nrb->_displ_cnt, nrb->_pnt_index[:nrb->_cnt], nrb->_weight_index[:nrb->_cnt], nrb->_nrb_t[:nrb->_cnt], nrb->_nrb_flag[:nrb->_cnt], nrb->_displ[:nrb->_displ_cnt + 1], nrb->_nrb_index[:nrb->_cnt]) async(nt->stream_id)) nrn_pragma_omp(target update to(nrb->_cnt, nrb->_displ_cnt, nrb->_pnt_index[:nrb->_cnt], nrb->_weight_index[:nrb->_cnt], nrb->_nrb_t[:nrb->_cnt], nrb->_nrb_flag[:nrb->_cnt], nrb->_displ[:nrb->_displ_cnt + 1], nrb->_nrb_index[:nrb->_cnt])) // clang-format on } } } nrn_pragma_acc(wait(nt->stream_id)) } void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb) { #ifdef CORENEURON_ENABLE_GPU if (!nt->compute_gpu) return; // check if nsb->_cnt was exceeded on GPU: as the buffer can not be increased // during gpu execution, we should just abort the execution. // \todo: this needs to be fixed with different memory allocation strategy if (nsb->_cnt > nsb->_size) { printf("ERROR: NetSendBuffer exceeded during GPU execution (rank %d)\n", nrnmpi_myid); nrn_abort(1); } if (nsb->_cnt) { Instrumentor::phase p_net_receive_buffer_order("net-send-buf-gpu2cpu"); } // clang-format off nrn_pragma_acc(update self(nsb->_sendtype[:nsb->_cnt], nsb->_vdata_index[:nsb->_cnt], nsb->_pnt_index[:nsb->_cnt], nsb->_weight_index[:nsb->_cnt], nsb->_nsb_t[:nsb->_cnt], nsb->_nsb_flag[:nsb->_cnt]) if (nsb->_cnt)) nrn_pragma_omp(target update from(nsb->_sendtype[:nsb->_cnt], nsb->_vdata_index[:nsb->_cnt], nsb->_pnt_index[:nsb->_cnt], nsb->_weight_index[:nsb->_cnt], nsb->_nsb_t[:nsb->_cnt], nsb->_nsb_flag[:nsb->_cnt]) if (nsb->_cnt)) // clang-format on #else (void) nt; (void) nsb; #endif } void update_nrnthreads_on_host(NrnThread* threads, int nthreads) { #ifdef CORENEURON_ENABLE_GPU for (int i = 0; i < nthreads; i++) { NrnThread* nt = threads + i; if (nt->compute_gpu && (nt->end > 0)) { /* -- copy data to host -- */ int ne = nrn_soa_padded_size(nt->end, 0); // clang-format off nrn_pragma_acc(update self(nt->_actual_rhs[:ne], nt->_actual_d[:ne], nt->_actual_a[:ne], nt->_actual_b[:ne], nt->_actual_v[:ne], nt->_actual_area[:ne])) nrn_pragma_omp(target update from(nt->_actual_rhs[:ne], nt->_actual_d[:ne], nt->_actual_a[:ne], nt->_actual_b[:ne], nt->_actual_v[:ne], nt->_actual_area[:ne])) // clang-format on nrn_pragma_acc(update self(nt->_actual_diam[:ne]) if (nt->_actual_diam != nullptr)) nrn_pragma_omp( target update from(nt->_actual_diam[:ne]) if (nt->_actual_diam != nullptr)) /* @todo: nt._ml_list[tml->index] = tml->ml; */ /* -- copy NrnThreadMembList list ml to host -- */ for (auto tml = nt->tml; tml; tml = tml->next) { if (!corenrn.get_is_artificial()[tml->index]) { nrn_pragma_acc(update self(tml->index, tml->ml->nodecount)) nrn_pragma_omp(target update from(tml->index, tml->ml->nodecount)) } update_ml_on_host(tml->ml, tml->index); } int pcnt = nrn_soa_padded_size(nt->shadow_rhs_cnt, 0); /* copy shadow_rhs to host */ /* copy shadow_d to host */ nrn_pragma_acc( update self(nt->_shadow_rhs[:pcnt], nt->_shadow_d[:pcnt]) if (nt->shadow_rhs_cnt)) nrn_pragma_omp(target update from( nt->_shadow_rhs[:pcnt], nt->_shadow_d[:pcnt]) if (nt->shadow_rhs_cnt)) // clang-format off nrn_pragma_acc(update self(nt->nrn_fast_imem->nrn_sav_rhs[:nt->end], nt->nrn_fast_imem->nrn_sav_d[:nt->end]) if (nt->nrn_fast_imem != nullptr)) nrn_pragma_omp(target update from(nt->nrn_fast_imem->nrn_sav_rhs[:nt->end], nt->nrn_fast_imem->nrn_sav_d[:nt->end]) if (nt->nrn_fast_imem != nullptr)) // clang-format on nrn_pragma_acc(update self(nt->pntprocs[:nt->n_pntproc]) if (nt->n_pntproc)) nrn_pragma_omp(target update from(nt->pntprocs[:nt->n_pntproc]) if (nt->n_pntproc)) nrn_pragma_acc(update self(nt->weights[:nt->n_weight]) if (nt->n_weight)) nrn_pragma_omp(target update from(nt->weights[:nt->n_weight]) if (nt->n_weight)) nrn_pragma_acc(update self( nt->presyns_helper[:nt->n_presyn], nt->presyns[:nt->n_presyn]) if (nt->n_presyn)) nrn_pragma_omp(target update from( nt->presyns_helper[:nt->n_presyn], nt->presyns[:nt->n_presyn]) if (nt->n_presyn)) { TrajectoryRequests* tr = nt->trajec_requests; if (tr && tr->varrays) { // The full buffers have `bsize` entries, but only `vsize` // of them are valid. for (int i = 0; i < tr->n_trajec; ++i) { nrn_pragma_acc(update self(tr->varrays[i][:tr->vsize])) nrn_pragma_omp(target update from(tr->varrays[i][:tr->vsize])) } } } /* dont update vdata, its pointer array nrn_pragma_acc(update self(nt->_vdata[:nt->_nvdata) if nt->_nvdata) nrn_pragma_omp(target update from(nt->_vdata[:nt->_nvdata) if (nt->_nvdata)) */ } } #else (void) threads; (void) nthreads; #endif } /** * Copy weights from GPU to CPU * * User may record NetCon weights at the end of simulation. * For this purpose update weights of all NrnThread objects * from GPU to CPU. */ void update_weights_from_gpu(NrnThread* threads, int nthreads) { #ifdef CORENEURON_ENABLE_GPU for (int i = 0; i < nthreads; i++) { NrnThread* nt = threads + i; size_t n_weight = nt->n_weight; if (nt->compute_gpu && n_weight > 0) { double* weights = nt->weights; nrn_pragma_acc(update host(weights [0:n_weight])) nrn_pragma_omp(target update from(weights [0:n_weight])) } } #endif } /** Cleanup device memory that is being tracked by the OpenACC runtime. * * This function painstakingly calls `cnrn_target_delete` in reverse order on all * pointers that were passed to `cnrn_target_copyin` in `setup_nrnthreads_on_device`. * This cleanup ensures that if the GPU is initialised multiple times from the * same process then the OpenACC runtime will not be polluted with old * pointers, which can cause errors. In particular if we do: * @code * { * // ... some_ptr is dynamically allocated ... * cnrn_target_copyin(some_ptr, some_size); * // ... do some work ... * // cnrn_target_delete(some_ptr); * free(some_ptr); * } * { * // ... same_ptr_again is dynamically allocated at the same address ... * cnrn_target_copyin(same_ptr_again, some_other_size); // ERROR * } * @endcode * the application will/may abort with an error such as: * FATAL ERROR: variable in data clause is partially present on the device. * The pattern above is typical of calling CoreNEURON on GPU multiple times in * the same process. */ void delete_nrnthreads_on_device(NrnThread* threads, int nthreads) { #ifdef CORENEURON_ENABLE_GPU for (int i = 0; i < nthreads; i++) { NrnThread* nt = threads + i; if (!nt->compute_gpu) { continue; } cnrn_target_delete(nt->_fornetcon_weight_perm, nt->_fornetcon_weight_perm_size); cnrn_target_delete(nt->_fornetcon_perm_indices, nt->_fornetcon_perm_indices_size); { TrajectoryRequests* tr = nt->trajec_requests; if (tr) { if (tr->varrays) { for (int i = 0; i < tr->n_trajec; ++i) { cnrn_target_delete(tr->varrays[i], tr->bsize); } cnrn_target_delete(tr->varrays, tr->n_trajec); } cnrn_target_delete(tr->gather, tr->n_trajec); cnrn_target_delete(tr); } } if (nt->_permute) { if (interleave_permute_type == 1) { InterleaveInfo* info = interleave_info + i; cnrn_target_delete(info->cellsize, nt->ncell); cnrn_target_delete(info->lastnode, nt->ncell); cnrn_target_delete(info->firstnode, nt->ncell); cnrn_target_delete(info->stride, info->nstride + 1); cnrn_target_delete(info); } else if (interleave_permute_type == 2) { InterleaveInfo* info = interleave_info + i; cnrn_target_delete(info->cellsize, info->nwarp); cnrn_target_delete(info->stridedispl, info->nwarp + 1); cnrn_target_delete(info->lastnode, info->nwarp + 1); cnrn_target_delete(info->firstnode, info->nwarp + 1); cnrn_target_delete(info->stride, info->nstride); cnrn_target_delete(info); } } if (nt->n_vecplay) { nrn_VecPlay_delete_from_device(nt); cnrn_target_delete(nt->_vecplay, nt->n_vecplay); } // Cleanup send_receive buffer. if (nt->_net_send_buffer_size) { cnrn_target_delete(nt->_net_send_buffer, nt->_net_send_buffer_size); } if (nt->n_presyn) { cnrn_target_delete(nt->presyns, nt->n_presyn); cnrn_target_delete(nt->presyns_helper, nt->n_presyn); } // Cleanup data that's setup in bbcore_read. if (nt->_nvdata) { cnrn_target_delete(nt->_vdata, nt->_nvdata); } // Cleanup weight vector used in NET_RECEIVE if (nt->n_weight) { cnrn_target_delete(nt->weights, nt->n_weight); } // Cleanup point processes if (nt->n_pntproc) { cnrn_target_delete(nt->pntprocs, nt->n_pntproc); } if (nt->nrn_fast_imem) { cnrn_target_delete(nt->nrn_fast_imem->nrn_sav_d, nt->end); cnrn_target_delete(nt->nrn_fast_imem->nrn_sav_rhs, nt->end); cnrn_target_delete(nt->nrn_fast_imem); } if (nt->shadow_rhs_cnt) { int pcnt = nrn_soa_padded_size(nt->shadow_rhs_cnt, 0); cnrn_target_delete(nt->_shadow_d, pcnt); cnrn_target_delete(nt->_shadow_rhs, pcnt); } for (auto tml = nt->tml; tml; tml = tml->next) { delete_ml_from_device(tml->ml, tml->index); cnrn_target_delete(tml); } cnrn_target_delete(nt->_ml_list, corenrn.get_memb_funcs().size()); cnrn_target_delete(nt->_v_parent_index, nt->end); cnrn_target_delete(nt->_data, nt->_ndata); } cnrn_target_delete(threads, nthreads); nrn_ion_global_map_delete_from_device(); #endif } void nrn_newtonspace_copyto_device(NewtonSpace* ns) { #ifdef CORENEURON_ENABLE_GPU // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { return; } int n = ns->n * ns->n_instance; // actually, the values of double do not matter, only the pointers. NewtonSpace* d_ns = cnrn_target_copyin(ns); double* pd; pd = cnrn_target_copyin(ns->delta_x, n); cnrn_target_memcpy_to_device(&(d_ns->delta_x), &pd); pd = cnrn_target_copyin(ns->high_value, n); cnrn_target_memcpy_to_device(&(d_ns->high_value), &pd); pd = cnrn_target_copyin(ns->low_value, n); cnrn_target_memcpy_to_device(&(d_ns->low_value), &pd); pd = cnrn_target_copyin(ns->rowmax, n); cnrn_target_memcpy_to_device(&(d_ns->rowmax), &pd); auto pint = cnrn_target_copyin(ns->perm, n); cnrn_target_memcpy_to_device(&(d_ns->perm), &pint); auto ppd = cnrn_target_copyin(ns->jacobian, ns->n); cnrn_target_memcpy_to_device(&(d_ns->jacobian), &ppd); // the actual jacobian doubles were allocated as a single array double* d_jacdat = cnrn_target_copyin(ns->jacobian[0], ns->n * n); for (int i = 0; i < ns->n; ++i) { pd = d_jacdat + i * n; cnrn_target_memcpy_to_device(&(ppd[i]), &pd); } #endif } void nrn_newtonspace_delete_from_device(NewtonSpace* ns) { #ifdef CORENEURON_ENABLE_GPU // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { return; } int n = ns->n * ns->n_instance; cnrn_target_delete(ns->jacobian[0], ns->n * n); cnrn_target_delete(ns->jacobian, ns->n); cnrn_target_delete(ns->perm, n); cnrn_target_delete(ns->rowmax, n); cnrn_target_delete(ns->low_value, n); cnrn_target_delete(ns->high_value, n); cnrn_target_delete(ns->delta_x, n); cnrn_target_delete(ns); #endif } void nrn_sparseobj_copyto_device(SparseObj* so) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { return; } unsigned n1 = so->neqn + 1; SparseObj* d_so = cnrn_target_copyin(so); // only pointer fields in SparseObj that need setting up are // rowst, diag, rhs, ngetcall, coef_list // only pointer fields in Elm that need setting up are // r_down, c_right, value // do not care about the Elm* ptr value, just the space. Elm** d_rowst = cnrn_target_copyin(so->rowst, n1); cnrn_target_memcpy_to_device(&(d_so->rowst), &d_rowst); Elm** d_diag = cnrn_target_copyin(so->diag, n1); cnrn_target_memcpy_to_device(&(d_so->diag), &d_diag); unsigned* pu = cnrn_target_copyin(so->ngetcall, so->_cntml_padded); cnrn_target_memcpy_to_device(&(d_so->ngetcall), &pu); double* pd = cnrn_target_copyin(so->rhs, n1 * so->_cntml_padded); cnrn_target_memcpy_to_device(&(d_so->rhs), &pd); double** d_coef_list = cnrn_target_copyin(so->coef_list, so->coef_list_size); cnrn_target_memcpy_to_device(&(d_so->coef_list), &d_coef_list); // Fill in relevant Elm pointer values for (unsigned irow = 1; irow < n1; ++irow) { for (Elm* elm = so->rowst[irow]; elm; elm = elm->c_right) { Elm* pelm = cnrn_target_copyin(elm); if (elm == so->rowst[irow]) { cnrn_target_memcpy_to_device(&(d_rowst[irow]), &pelm); } else { Elm* d_e = cnrn_target_deviceptr(elm->c_left); cnrn_target_memcpy_to_device(&(pelm->c_left), &d_e); } if (elm->col == elm->row) { cnrn_target_memcpy_to_device(&(d_diag[irow]), &pelm); } if (irow > 1) { if (elm->r_up) { Elm* d_e = cnrn_target_deviceptr(elm->r_up); cnrn_target_memcpy_to_device(&(pelm->r_up), &d_e); } } pd = cnrn_target_copyin(elm->value, so->_cntml_padded); cnrn_target_memcpy_to_device(&(pelm->value), &pd); } } // visit all the Elm again and fill in pelm->r_down and pelm->c_left for (unsigned irow = 1; irow < n1; ++irow) { for (Elm* elm = so->rowst[irow]; elm; elm = elm->c_right) { auto pelm = cnrn_target_deviceptr(elm); if (elm->r_down) { auto d_e = cnrn_target_deviceptr(elm->r_down); cnrn_target_memcpy_to_device(&(pelm->r_down), &d_e); } if (elm->c_right) { auto d_e = cnrn_target_deviceptr(elm->c_right); cnrn_target_memcpy_to_device(&(pelm->c_right), &d_e); } } } // Fill in the d_so->coef_list for (unsigned i = 0; i < so->coef_list_size; ++i) { pd = cnrn_target_deviceptr(so->coef_list[i]); cnrn_target_memcpy_to_device(&(d_coef_list[i]), &pd); } #endif } void nrn_sparseobj_delete_from_device(SparseObj* so) { #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) // FIXME this check needs to be tweaked if we ever want to run with a mix // of CPU and GPU threads. if (nrn_threads[0].compute_gpu == 0) { return; } unsigned n1 = so->neqn + 1; for (unsigned irow = 1; irow < n1; ++irow) { for (Elm* elm = so->rowst[irow]; elm; elm = elm->c_right) { cnrn_target_delete(elm->value, so->_cntml_padded); cnrn_target_delete(elm); } } cnrn_target_delete(so->coef_list, so->coef_list_size); cnrn_target_delete(so->rhs, n1 * so->_cntml_padded); cnrn_target_delete(so->ngetcall, so->_cntml_padded); cnrn_target_delete(so->diag, n1); cnrn_target_delete(so->rowst, n1); cnrn_target_delete(so); #endif } #ifdef CORENEURON_ENABLE_GPU void nrn_ion_global_map_copyto_device() { if (nrn_ion_global_map_size) { double** d_data = cnrn_target_copyin(nrn_ion_global_map, nrn_ion_global_map_size); for (int j = 0; j < nrn_ion_global_map_size; j++) { if (nrn_ion_global_map[j]) { double* d_mechmap = cnrn_target_copyin(nrn_ion_global_map[j], ion_global_map_member_size); cnrn_target_memcpy_to_device(&(d_data[j]), &d_mechmap); } } } } void nrn_ion_global_map_delete_from_device() { for (int j = 0; j < nrn_ion_global_map_size; j++) { if (nrn_ion_global_map[j]) { cnrn_target_delete(nrn_ion_global_map[j], ion_global_map_member_size); } } if (nrn_ion_global_map_size) { cnrn_target_delete(nrn_ion_global_map, nrn_ion_global_map_size); } } void init_gpu() { // check how many gpu devices available per node int num_devices_per_node = cnrn_target_get_num_devices(); // if no gpu found, can't run on GPU if (num_devices_per_node == 0) { nrn_fatal_error("\n ERROR : Enabled GPU execution but couldn't find NVIDIA GPU!\n"); } if (corenrn_param.num_gpus != 0) { if (corenrn_param.num_gpus > num_devices_per_node) { nrn_fatal_error("Fatal error: asking for '%d' GPUs per node but only '%d' available\n", corenrn_param.num_gpus, num_devices_per_node); } else { num_devices_per_node = corenrn_param.num_gpus; } } // get local rank within a node and assign specific gpu gpu for this node. // multiple threads within the node will use same device. int local_rank = 0; int local_size = 1; #if NRNMPI if (corenrn_param.mpi_enable) { local_rank = nrnmpi_local_rank(); local_size = nrnmpi_local_size(); } #endif cnrn_target_set_default_device(local_rank % num_devices_per_node); if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) { std::cout << " Info : " << num_devices_per_node << " GPUs shared by " << local_size << " ranks per node\n"; } } void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) { for (int i = 0; i < nt->n_vecplay; i++) { VecPlayContinuous* vecplay_instance = (VecPlayContinuous*) nt->_vecplay[i]; /** just VecPlayContinuous object */ VecPlayContinuous* d_vecplay_instance = cnrn_target_copyin(vecplay_instance); cnrn_target_memcpy_to_device((VecPlayContinuous**) (&(d_vecplay[i])), &d_vecplay_instance); /** copy y_, t_ and discon_indices_ */ copy_ivoc_vect_to_device(vecplay_instance->y_, d_vecplay_instance->y_); copy_ivoc_vect_to_device(vecplay_instance->t_, d_vecplay_instance->t_); // OL211213: beware, the test suite does not currently include anything // with a non-null discon_indices_. if (vecplay_instance->discon_indices_) { IvocVect* d_discon_indices = cnrn_target_copyin(vecplay_instance->discon_indices_); cnrn_target_memcpy_to_device(&(d_vecplay_instance->discon_indices_), &d_discon_indices); copy_ivoc_vect_to_device(*(vecplay_instance->discon_indices_), *(d_vecplay_instance->discon_indices_)); } /** copy PlayRecordEvent : todo: verify this */ PlayRecordEvent* d_e_ = cnrn_target_copyin(vecplay_instance->e_); cnrn_target_memcpy_to_device(&(d_e_->plr_), (PlayRecord**) (&d_vecplay_instance)); cnrn_target_memcpy_to_device(&(d_vecplay_instance->e_), &d_e_); /** copy pd_ : note that it's pointer inside ml->data and hence data itself is * already on GPU */ double* d_pd_ = cnrn_target_deviceptr(vecplay_instance->pd_); cnrn_target_memcpy_to_device(&(d_vecplay_instance->pd_), &d_pd_); } } void nrn_VecPlay_delete_from_device(NrnThread* nt) { for (int i = 0; i < nt->n_vecplay; i++) { auto* vecplay_instance = static_cast(nt->_vecplay[i]); cnrn_target_delete(vecplay_instance->e_); if (vecplay_instance->discon_indices_) { delete_ivoc_vect_from_device(*(vecplay_instance->discon_indices_)); } delete_ivoc_vect_from_device(vecplay_instance->t_); delete_ivoc_vect_from_device(vecplay_instance->y_); cnrn_target_delete(vecplay_instance); } } #endif } // namespace coreneuron ================================================ FILE: coreneuron/gpu/nrn_acc_manager.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once namespace coreneuron { struct Memb_list; struct NrnThread; struct NetSendBuffer_t; void setup_nrnthreads_on_device(NrnThread* threads, int nthreads); void delete_nrnthreads_on_device(NrnThread* threads, int nthreads); void update_nrnthreads_on_host(NrnThread* threads, int nthreads); void update_net_receive_buffer(NrnThread* _nt); // Called by NModl void realloc_net_receive_buffer(NrnThread* nt, Memb_list* ml); void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb); void update_weights_from_gpu(NrnThread* threads, int nthreads); void init_gpu(); } // namespace coreneuron ================================================ FILE: coreneuron/io/core2nrn_data_return.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include "coreneuron/coreneuron.hpp" #include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/io/core2nrn_data_return.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/permute/node_permute.h" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/utils/vrecitem.h" #include "coreneuron/io/mem_layout_util.hpp" /** @brief, Information from NEURON to help with copying data to NEURON. * Info for copying voltage, i_membrane_, and mechanism data. * See implementaton in * nrn/src/nrniv/nrnbbcore_write.cpp:nrnthreads_type_return. * Return is size of either the returned data pointer or the number * of pointers in mdata. tid is the thread index. */ size_t (*nrn2core_type_return_)(int type, int tid, double*& data, double**& mdata); /** @brief, Call NEURON mechanism bbcore_read. * Inverse of bbcore_write for transfer from NEURON to CoreNEURON. * Mostly for transferring back the nrnran123_State sequence so psolve can * continue on NEURON side (or continue psolve on CoreNEURON). */ extern "C" { int (*core2nrn_corepointer_mech_)(int tid, int type, int icnt, int dcnt, int* iArray, double* dArray); } namespace coreneuron { /** @brief permuted array copied to unpermuted array * If permute is NULL then just a copy */ static void inverse_permute_copy(size_t n, double* permuted_src, double* dest, int* permute) { if (permute) { for (size_t i = 0; i < n; ++i) { dest[i] = permuted_src[permute[i]]; } } else { std::copy(permuted_src, permuted_src + n, dest); } } /** @brief SoA permuted mechanism data copied to unpermuted AoS data. * dest is an array of n pointers to the beginning of each sz length array. * src is a contiguous array of sz segments of size stride. The stride * may be slightly greater than n for purposes of alignment. * Each of the sz segments of src are permuted. */ static void soa2aos_inverse_permute_copy(size_t n, int sz, int stride, double* src, double** dest, int* permute) { // src is soa and permuted. dest is n pointers to sz doubles (aos). for (size_t instance = 0; instance < n; ++instance) { double* d = dest[instance]; double* s = src + permute[instance]; for (int i = 0; i < sz; ++i) { d[i] = s[i * stride]; } } } /** @brief SoA unpermuted mechanism data copied to unpermuted AoS data. * dest is an array of n pointers to the beginning of each sz length array. * src is a contiguous array of sz segments of size stride. The stride * may be slightly greater than n for purposes of alignment. * Each of the sz segments of src have the same order as the n pointers * of dest. */ static void soa2aos_unpermuted_copy(size_t n, int sz, int stride, double* src, double** dest) { // src is soa and permuted. dest is n pointers to sz doubles (aos). for (size_t instance = 0; instance < n; ++instance) { double* d = dest[instance]; double* s = src + instance; for (int i = 0; i < sz; ++i) { d[i] = s[i * stride]; } } } /** @brief AoS mechanism data copied to AoS data. * dest is an array of n pointers to the beginning of each sz length array. * src is a contiguous array of n segments of size sz. */ static void aos2aos_copy(size_t n, int sz, double* src, double** dest) { for (size_t instance = 0; instance < n; ++instance) { double* d = dest[instance]; double* s = src + (instance * sz); std::copy(s, s + sz, d); } } /** @brief Copy back COREPOINTER info to NEURON */ static void core2nrn_corepointer(int tid, NrnThreadMembList* tml) { // Based on get_bbcore_write fragment in nrn_checkpoint.cpp int type = tml->index; if (!corenrn.get_bbcore_write()[type]) { return; } NrnThread& nt = nrn_threads[tid]; Memb_list* ml = tml->ml; double* d = nullptr; Datum* pd = nullptr; int layout = corenrn.get_mech_data_layout()[type]; int dsz = corenrn.get_prop_param_size()[type]; int pdsz = corenrn.get_prop_dparam_size()[type]; int aln_cntml = nrn_soa_padded_size(ml->nodecount, layout); int icnt = 0; int dcnt = 0; // data size and allocate for (int j = 0; j < ml->nodecount; ++j) { int jp = j; if (ml->_permute) { jp = ml->_permute[j]; } d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } std::unique_ptr iArray; std::unique_ptr dArray; if (icnt) { iArray.reset(new int[icnt]); } if (dcnt) { dArray.reset(new double[dcnt]); } icnt = dcnt = 0; for (int j = 0; j < ml->nodecount; j++) { int jp = j; if (ml->_permute) { jp = ml->_permute[j]; } d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])(dArray.get(), iArray.get(), &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } (*core2nrn_corepointer_mech_)(tid, type, icnt, dcnt, iArray.get(), dArray.get()); } /** @brief Copy event queue and related state back to NEURON. */ static void core2nrn_tqueue(NrnThread&); /** @brief Callback to clear NEURON thread queues. In particular need to initialize bin queues to the current time before transferring events. */ extern "C" { void (*core2nrn_clear_queues_)(double t); } /** @brief All activated WATCH statements need activation on NEURON side. */ // vector in unpermuted Memb_list index order of vector of // activated watch_index (the bool is whether it is above threshold). using Core2NrnWatchInfoItem = std::vector>; using Core2NrnWatchInfo = std::vector; extern "C" { void (*core2nrn_watch_clear_)(); void (*core2nrn_watch_activate_)(int tid, int type, int watch_begin, Core2NrnWatchInfo&); } static void core2nrn_watch(); /** @brief VecPlay indices back to NEURON */ extern "C" { void (*core2nrn_vecplay_)(int tid, int i_nrn, int last, int discon, int ubound); void (*core2nrn_vecplay_events_)(); } static void core2nrn_vecplay(); /** @brief copy data back to NEURON. * Copies t, voltage, i_membrane_ if it used, and mechanism param data. * Copies event queue and related state, e.g. WATCH, VecPlayContinuous. */ void core2nrn_data_return() { if (!nrn2core_type_return_) { return; } (*core2nrn_clear_queues_)(nrn_threads[0]._t); // all threads at same time for (int tid = 0; tid < nrn_nthread; ++tid) { size_t n = 0; double* data = nullptr; double** mdata = nullptr; NrnThread& nt = nrn_threads[tid]; n = (*nrn2core_type_return_)(0, tid, data, mdata); // 0 means time if (n) { // not the empty thread data[0] = nt._t; } if (nt.end) { // transfer voltage and possibly i_membrane_ n = (*nrn2core_type_return_)(voltage, tid, data, mdata); assert(n == size_t(nt.end) && data); inverse_permute_copy(n, nt._actual_v, data, nt._permute); if (nt.nrn_fast_imem) { n = (*nrn2core_type_return_)(i_membrane_, tid, data, mdata); assert(n == size_t(nt.end) && data); inverse_permute_copy(n, nt.nrn_fast_imem->nrn_sav_rhs, data, nt._permute); } } for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { int mtype = tml->index; Memb_list* ml = tml->ml; n = (*nrn2core_type_return_)(mtype, tid, data, mdata); assert(n == size_t(ml->nodecount) && mdata); if (n == 0) { continue; } // NEURON is AoS, CoreNEURON may be SoA and may be permuted. // On the NEURON side, the data is actually contiguous because of // cache_efficient, but that may not be the case for ARTIFICIAL_CELL. // For initial implementation simplicity, use the mdata info which gives // a double* for each param_size mech instance. int* permute = ml->_permute; double* cndat = ml->data; int layout = corenrn.get_mech_data_layout()[mtype]; int sz = corenrn.get_prop_param_size()[mtype]; if (layout == Layout::SoA) { int stride = ml->_nodecount_padded; if (permute) { soa2aos_inverse_permute_copy(n, sz, stride, cndat, mdata, permute); } else { soa2aos_unpermuted_copy(n, sz, stride, cndat, mdata); } } else { /* AoS */ aos2aos_copy(n, sz, cndat, mdata); } core2nrn_corepointer(tid, tml); } // Copy the event queue and related state. core2nrn_tqueue(nt); } core2nrn_vecplay(); core2nrn_watch(); } /** @brief Callbacks into NEURON for WatchCondition. */ static void core2nrn_watch() { (*core2nrn_watch_clear_)(); // much of the following nested iterations follows the // watch_activate_clear() function in sim/finitialize.cpp, though here // we iterate over nt._watch_types instead of nt.tml and then picking out // the WATCH relevant types with corenrn.get_watch_check(). for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread& nt = nrn_threads[tid]; if (nt._watch_types) { for (int i = 0; nt._watch_types[i] != 0; ++i) { int type = nt._watch_types[i]; Memb_list& ml = *(nt._ml_list[type]); int nodecount = ml.nodecount; Core2NrnWatchInfo watch_info(ml.nodecount); int* permute = ml._permute; int* pdata = (int*) ml.pdata; int dparam_size = corenrn.get_prop_dparam_size()[type]; int layout = corenrn.get_mech_data_layout()[type]; int first, last; watch_datum_indices(type, first, last); int watch_begin = first; for (int iml = 0; iml < nodecount; ++iml) { int iml_permute = permute ? permute[iml] : iml; Core2NrnWatchInfoItem& wiv = watch_info[iml]; for (int ix = first; ix <= last; ++ix) { int datum = pdata[nrn_i_layout(iml_permute, nodecount, ix, dparam_size, layout)]; if (datum & 2) { // activated bool above_thresh = bool(datum & 1); wiv.push_back(std::pair(ix, above_thresh)); } } } (*core2nrn_watch_activate_)(tid, type, watch_begin, watch_info); } } } } /** @brief Transfer VecPlay indices to NEURON. */ void core2nrn_vecplay() { for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread& nt = nrn_threads[tid]; std::vector i_nrn; int ok = (*nrn2core_get_dat2_vecplay_)(tid, i_nrn); if (nt.n_vecplay) { assert(ok); } for (int i = 0; i < nt.n_vecplay; ++i) { VecPlayContinuous& vp = *((VecPlayContinuous*) nt._vecplay[i]); (*core2nrn_vecplay_)(tid, i_nrn[i], (int) vp.last_index_, (int) vp.discon_index_, (int) vp.ubound_index_); } } (*core2nrn_vecplay_events_)(); } /** @brief Callbacks into NEURON for queue event types. */ extern "C" { void (*core2nrn_NetCon_event_)(int tid, double td, size_t nc_index); // must calculate netcon index from the weight index on this side void (*core2nrn_SelfEvent_event_)(int tid, double td, int tar_type, int tar_index, double flag, size_t nc_index, int is_movable); // the no weight case void (*core2nrn_SelfEvent_event_noweight_)(int tid, double td, int tar_type, int tar_index, double flag, int is_movable); // PreSyn.flag_ will be 1 if it has fired and the value it is watching // is still greater than threshold. (Note, is 0 no matter what after // finitialize so using a set to send back the flag explicitly for any // that are 1. Although that is not really relevant in the core2nrn // direction. To match up PreSyn on NEURON and CoreNEURON side, we use // the (unpermuted) voltage index. void (*core2nrn_PreSyn_flag_)(int tid, std::set presyns_flag_true); // Receive the PreSyn.flag_ == true voltage indices from the neuron side. void (*nrn2core_transfer_PreSyn_flag_)(int tid, std::set& presyns_flag_true); } static void core2nrn_PreSyn_flag(NrnThread& nt) { std::set presyns_flag_true; std::unique_ptr pinv_nt; if (nt._permute) { pinv_nt.reset(inverse_permute(nt._permute, nt.end)); } for (int i = 0; i < nt.n_presyn; ++i) { PreSyn& ps = nt.presyns[i]; PreSynHelper& psh = nt.presyns_helper[i]; if (psh.flag_ && ps.thvar_index_ >= 0) { int index_v = pinv_nt ? pinv_nt[ps.thvar_index_] : ps.thvar_index_; presyns_flag_true.insert(index_v); } } // have to send even if empty so NEURON side can turn off all flag_ (*core2nrn_PreSyn_flag_)(nt.id, presyns_flag_true); } void nrn2core_PreSyn_flag_receive(int tid) { NrnThread& nt = nrn_threads[tid]; // turn off all the PreSyn.flag_ as they might have been turned off // on the NEURON side if NEURON integrated a bit. for (int i = 0; i < nt.n_presyn; ++i) { nt.presyns_helper[i].flag_ = 0; // in case 1 from previous psolve } std::set presyns_flag_true; (*nrn2core_transfer_PreSyn_flag_)(tid, presyns_flag_true); if (presyns_flag_true.empty()) { return; } std::unique_ptr pinv_nt; if (nt._permute) { pinv_nt.reset(inverse_permute(nt._permute, nt.end)); } for (int i = 0; i < nt.n_presyn; ++i) { PreSyn& ps = nt.presyns[i]; PreSynHelper& psh = nt.presyns_helper[i]; if (ps.thvar_index_ >= 0) { int index_v = pinv_nt ? pinv_nt[ps.thvar_index_] : ps.thvar_index_; if (presyns_flag_true.erase(index_v)) { psh.flag_ = 1; if (presyns_flag_true.empty()) { break; } } } } } std::map type2invperm; static void clear_inv_perm_for_selfevent_targets() { for (auto it: type2invperm) { delete[] it.second; } type2invperm.clear(); } using SelfEventWeightMap = std::map>; // return false unless q is pushed to sewm static bool core2nrn_tqueue_item(TQItem* q, SelfEventWeightMap& sewm, NrnThread& nt) { DiscreteEvent* d = (DiscreteEvent*) q->data_; double td = q->t_; bool in_sewm = false; switch (d->type()) { case NetConType: { NetCon* nc = (NetCon*) d; assert(nc >= nt.netcons && (nc < (nt.netcons + nt.n_netcon))); size_t nc_index = nc - nt.netcons; (*core2nrn_NetCon_event_)(nt.id, td, nc_index); break; } case SelfEventType: { SelfEvent* se = (SelfEvent*) d; Point_process* pnt = se->target_; assert(pnt->_tid == nt.id); int tar_type = (int) pnt->_type; Memb_list* ml = nt._ml_list[tar_type]; if (ml->_permute) { // if permutation, then make inverse available // Doing this here because we don't know, in general, which // mechanisms use SelfEvent if (type2invperm.count(tar_type) == 0) { type2invperm[tar_type] = inverse_permute(ml->_permute, ml->nodecount); } } double flag = se->flag_; TQItem** movable = (TQItem**) (se->movable_); int is_movable = (movable && *movable == q) ? 1 : 0; int weight_index = se->weight_index_; // the weight_index is useless on the NEURON side so we need // to convert that to NetCon index and let the NEURON side // figure out the weight_index. To figure out the netcon_index // construct a {weight_index : [TQItem]} here for any // weight_index >= 0, otherwise send it NEURON now. if (weight_index >= 0) { // Potentially several SelfEvent TQItem* associated with // same weight index. More importantly, collect them all // so that we only need to iterate over the nt.netcons once sewm[weight_index].push_back(q); in_sewm = true; } else { int tar_index = pnt->_i_instance; // correct for no permutation if (ml->_permute) { tar_index = type2invperm[tar_type][tar_index]; } (*core2nrn_SelfEvent_event_noweight_)( nt.id, td, tar_type, tar_index, flag, is_movable); delete se; } break; } case PreSynType: { // nothing to transfer // `d` can be cast to PreSyn* break; } case NetParEventType: { // nothing to transfer break; } case PlayRecordEventType: { // nothing to transfer break; } default: { // In particular, InputPreSyn does not appear in tqueue as it // immediately fans out to NetCon. std::stringstream qetype; qetype << d->type(); hoc_execerror("core2nrn_tqueue_item -> unimplemented queue event type:", qetype.str().c_str()); break; } } return in_sewm; } void core2nrn_tqueue(NrnThread& nt) { // VecPlayContinuous // PatternStim // nrn_checkpoint.cpp has: // Avoid extra spikes due to some presyn voltages above threshold // PreSyn.flag_ that are on core2nrn_PreSyn_flag(nt); // The items on the queue NetCvodeThreadData& ntd = net_cvode_instance->p[nt.id]; // make sure all buffered interthread events are on the queue ntd.enqueue(net_cvode_instance, &nt); TQueue* tqe = ntd.tqe_; TQItem* q; SelfEventWeightMap sewm; // TQItems from atomic_dq while ((q = tqe->atomic_dq(1e20)) != nullptr) { if (core2nrn_tqueue_item(q, sewm, nt) == false) { delete q; } } // TQitems from binq_ for (q = tqe->binq_->first(); q; q = tqe->binq_->next(q)) { bool const result = core2nrn_tqueue_item(q, sewm, nt); assert(result == false); } // For self events with weight, find the NetCon index and send that // to NEURON. // If the SelfEventWeightMap approach (and the corresponding pattern // on the nrn2core side in NEURON) ends up being too expensive in space // or time, it would be possible to modify SelfEvent to use the NetCon // index instead of the weight index, and then directly determine the // NetCon within the core2nrn_tqueue_item function above and call // (*core2nrn_SelfEvent_event_) from there. if (!sewm.empty()) { for (int nc_index = 0; nc_index < nt.n_netcon; ++nc_index) { NetCon& nc = nt.netcons[nc_index]; int weight_index = nc.u.weight_index_; auto search = sewm.find(weight_index); if (search != sewm.end()) { const auto& tqitems = search->second; for (auto q: tqitems) { DiscreteEvent* d = (DiscreteEvent*) (q->data_); double td = q->t_; assert(d->type() == SelfEventType); SelfEvent* se = (SelfEvent*) d; int tar_type = se->target_->_type; // Note that instead of getting tar_index from the permuted // pnt->_i_instance here and for the noweight case above // which then needs the possibly large inverse permutation // vectors, it would save some space to use the unpermuted // nt.pntprocs array along with a much shorter vector // of type offsets. int tar_index = se->target_->_i_instance; if (nt._ml_list[tar_type]->_permute) { tar_index = type2invperm[tar_type][tar_index]; } double flag = se->flag_; TQItem** movable = (TQItem**) (se->movable_); int is_movable = (movable && *movable == q) ? 1 : 0; (*core2nrn_SelfEvent_event_)( nt.id, td, tar_type, tar_index, flag, nc_index, is_movable); delete q; delete se; } } } } clear_inv_perm_for_selfevent_targets(); } } // namespace coreneuron ================================================ FILE: coreneuron/io/core2nrn_data_return.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once namespace coreneuron { /** @brief Copies back to NEURON everything needed to analyze and continue simulation. I.e. voltage, i_membrane_, mechanism data, event queue, WATCH state, Play state, etc. */ extern void core2nrn_data_return(); /** @brief return first and last datum indices of WATCH statements */ extern void watch_datum_indices(int type, int& first, int& last); } // namespace coreneuron ================================================ FILE: coreneuron/io/file_utils.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include #if defined(MINGW) #define mkdir(dir_name, permission) _mkdir(dir_name) #endif /* adapted from : gist@jonathonreinhart/mkdir_p.c */ int mkdir_p(const char* path) { const int path_len = strlen(path); if (path_len == 0) { printf("Warning: Empty path for creating directory"); return -1; } char* dirpath = new char[path_len + 1]; strcpy(dirpath, path); errno = 0; /* iterate from outer upto inner dir */ for (char* p = dirpath + 1; *p; p++) { if (*p == '/') { /* temporarily truncate to sub-dir */ *p = '\0'; if (mkdir(dirpath, S_IRWXU) != 0) { if (errno != EEXIST) return -1; } *p = '/'; } } if (mkdir(dirpath, S_IRWXU) != 0) { if (errno != EEXIST) { return -1; } } delete[] dirpath; return 0; } ================================================ FILE: coreneuron/io/file_utils.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ /** * @file file_utils.h * @brief Utility functions for file/directory management * */ #pragma once /** @brief Creates directory if doesn't exisit (similar to mkdir -p) * @param Directory path * @return Status */ int mkdir_p(const char* path); ================================================ FILE: coreneuron/io/global_vars.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include #include "coreneuron/utils/randoms/nrnran123.h" #include "coreneuron/nrnconf.h" #include "coreneuron/mechanism/membfunc.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/utils/nrnoc_aux.hpp" void* (*nrn2core_get_global_dbl_item_)(void*, const char*& name, int& size, double*& val); int (*nrn2core_get_global_int_item_)(const char* name); namespace coreneuron { using PSD = std::pair; using N2V = std::map; static N2V* n2v; void hoc_register_var(DoubScal* ds, DoubVec* dv, VoidFunc*) { if (!n2v) { n2v = new N2V(); } for (size_t i = 0; ds[i].name; ++i) { (*n2v)[ds[i].name] = PSD(0, ds[i].pdoub); } for (size_t i = 0; dv[i].name; ++i) { (*n2v)[dv[i].name] = PSD(dv[i].index1, ds[i].pdoub); } } void set_globals(const char* path, bool cli_global_seed, int cli_global_seed_value) { if (!n2v) { n2v = new N2V(); } (*n2v)["celsius"] = PSD(0, &celsius); (*n2v)["dt"] = PSD(0, &dt); (*n2v)["t"] = PSD(0, &t); (*n2v)["PI"] = PSD(0, &pi); if (corenrn_embedded) { // CoreNEURON embedded, get info direct from NEURON const char* name; int size; double* val = nullptr; void* p = nullptr; while (1) { p = (*nrn2core_get_global_dbl_item_)(p, name, size, val); // If the last item in the NEURON symbol table is a USERDOUBLE // then p is NULL but val is not NULL and following fragment // will be processed before exit from loop. if (val) { N2V::iterator it = n2v->find(name); if (it != n2v->end()) { if (size == 0) { nrn_assert(it->second.first == 0); *(it->second.second) = val[0]; } else { nrn_assert(it->second.first == (size_t) size); double* pval = it->second.second; for (int i = 0; i < size; ++i) { pval[i] = val[i]; } } } delete[] val; val = nullptr; } if (!p) { break; } } secondorder = (*nrn2core_get_global_int_item_)("secondorder"); nrnran123_set_globalindex((*nrn2core_get_global_int_item_)("Random123_global_index")); } else { // get the info from the globals.dat file std::string fname = std::string(path) + std::string("/globals.dat"); FILE* f = fopen(fname.c_str(), "r"); if (!f) { printf("ignore: could not open %s\n", fname.c_str()); delete n2v; n2v = nullptr; return; } char line[256]; nrn_assert(fscanf(f, "%s\n", line) == 1); check_bbcore_write_version(line); for (;;) { char name[256]; double val; int n; nrn_assert(fgets(line, 256, f) != nullptr); N2V::iterator it; if (sscanf(line, "%s %lf", name, &val) == 2) { if (strcmp(name, "0") == 0) { break; } it = n2v->find(name); if (it != n2v->end()) { nrn_assert(it->second.first == 0); *(it->second.second) = val; } } else if (sscanf(line, "%[^[][%d]\n", name, &n) == 2) { if (strcmp(name, "0") == 0) { break; } it = n2v->find(name); if (it != n2v->end()) { nrn_assert(it->second.first == (size_t) n); double* pval = it->second.second; for (int i = 0; i < n; ++i) { nrn_assert(fgets(line, 256, f) != nullptr); nrn_assert(sscanf(line, "%lf\n", &val) == 1); pval[i] = val; } } } else { nrn_assert(0); } } while (fgets(line, 256, f)) { char name[256]; int n; if (sscanf(line, "%s %d", name, &n) == 2) { if (strcmp(name, "secondorder") == 0) { secondorder = n; } else if (strcmp(name, "Random123_globalindex") == 0) { nrnran123_set_globalindex((uint32_t) n); } else if (strcmp(name, "_nrnunit_use_legacy_") == 0) { if (n != CORENEURON_USE_LEGACY_UNITS) { hoc_execerror( "CORENRN_ENABLE_LEGACY_UNITS not" " consistent with NEURON value of" " nrnunit_use_legacy()", nullptr); } } } } fclose(f); // overwrite global.dat config if seed is specified on Command line if (cli_global_seed) { nrnran123_set_globalindex((uint32_t) cli_global_seed_value); } } #if CORENRN_DEBUG for (const auto& item: *n2v) { printf("%s %ld %p\n", item.first.c_str(), item.second.first, item.second.second); } #endif delete n2v; n2v = nullptr; } } // namespace coreneuron ================================================ FILE: coreneuron/io/lfp.cpp ================================================ #include "coreneuron/io/lfp.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" #include #include #include namespace coreneuron { namespace lfputils { double line_source_lfp_factor(const Point3D& e_pos, const Point3D& seg_0, const Point3D& seg_1, const double radius, const double f) { nrn_assert(radius >= 0.0); Point3D dx = paxpy(seg_1, -1.0, seg_0); Point3D de = paxpy(e_pos, -1.0, seg_0); double dx2(dot(dx, dx)); double dxn(std::sqrt(dx2)); if (dxn < std::numeric_limits::epsilon()) { return point_source_lfp_factor(e_pos, seg_0, radius, f); } double de2(dot(de, de)); double mu(dot(dx, de) / dx2); Point3D de_star(paxpy(de, -mu, dx)); double de_star2(dot(de_star, de_star)); double q2(de_star2 / dx2); double delta(mu * mu - (de2 - radius * radius) / dx2); double one_m_mu(1.0 - mu); auto log_integral = [&q2, &dxn](double a, double b) { if (q2 < std::numeric_limits::epsilon()) { if (a * b <= 0) { std::ostringstream s; s << "Log integral: invalid arguments " << b << " " << a << ". Likely electrode exactly on the segment and " << "no flooring is present."; throw std::invalid_argument(s.str()); } return std::abs(std::log(b / a)) / dxn; } else { return std::log((b + std::sqrt(b * b + q2)) / (a + std::sqrt(a * a + q2))) / dxn; } }; if (delta <= 0.0) { return f * log_integral(-mu, one_m_mu); } else { double sqr_delta(std::sqrt(delta)); double d1(mu - sqr_delta); double d2(mu + sqr_delta); double parts = 0.0; if (d1 > 0.0) { double b(std::min(d1, 1.0) - mu); parts += log_integral(-mu, b); } if (d2 < 1.0) { double b(std::max(d2, 0.0) - mu); parts += log_integral(b, one_m_mu); }; // complement double maxd1_0(std::max(d1, 0.0)), mind2_1(std::min(d2, 1.0)); if (maxd1_0 < mind2_1) { parts += 1.0 / radius * (mind2_1 - maxd1_0); } return f * parts; }; } } // namespace lfputils using namespace lfputils; template LFPCalculator::LFPCalculator(const Point3Ds& seg_start, const Point3Ds& seg_end, const std::vector& radius, const std::vector& segment_ids, const Point3Ds& electrodes, double extra_cellular_conductivity) : segment_ids_(segment_ids) { if (seg_start.size() != seg_end.size()) { throw std::invalid_argument("Different number of segment starts and ends."); } if (seg_start.size() != radius.size()) { throw std::invalid_argument("Different number of segments and radii."); } double f(1.0 / (extra_cellular_conductivity * 4.0 * pi)); m.resize(electrodes.size()); for (size_t k = 0; k < electrodes.size(); ++k) { auto& ms = m[k]; ms.resize(seg_start.size()); for (size_t l = 0; l < seg_start.size(); l++) { ms[l] = getFactor(electrodes[k], seg_start[l], seg_end[l], radius[l], f); } } } template template inline void LFPCalculator::lfp(const Vector& membrane_current) { std::vector res(m.size()); for (size_t k = 0; k < m.size(); ++k) { res[k] = 0.0; auto& ms = m[k]; for (size_t l = 0; l < ms.size(); l++) { res[k] += ms[l] * membrane_current[segment_ids_[l]]; } } #if NRNMPI if (corenrn_param.mpi_enable) { lfp_values_.resize(res.size()); int mpi_sum{1}; nrnmpi_dbl_allreduce_vec(res.data(), lfp_values_.data(), res.size(), mpi_sum); } else #endif { std::swap(res, lfp_values_); } } template LFPCalculator::LFPCalculator(const lfputils::Point3Ds& seg_start, const lfputils::Point3Ds& seg_end, const std::vector& radius, const std::vector& segment_ids, const lfputils::Point3Ds& electrodes, double extra_cellular_conductivity); template LFPCalculator::LFPCalculator(const lfputils::Point3Ds& seg_start, const lfputils::Point3Ds& seg_end, const std::vector& radius, const std::vector& segment_ids, const lfputils::Point3Ds& electrodes, double extra_cellular_conductivity); template void LFPCalculator::lfp(const DoublePtr& membrane_current); template void LFPCalculator::lfp(const DoublePtr& membrane_current); template void LFPCalculator::lfp(const std::vector& membrane_current); template void LFPCalculator::lfp(const std::vector& membrane_current); } // namespace coreneuron ================================================ FILE: coreneuron/io/lfp.hpp ================================================ #pragma once #include #include #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/nrnconf.h" #include "coreneuron/utils/nrn_assert.h" namespace coreneuron { namespace lfputils { using Point3D = std::array; using Point3Ds = std::vector; using DoublePtr = double*; inline double dot(const Point3D& p1, const Point3D& p2) { return p1[0] * p2[0] + p1[1] * p2[1] + p1[2] * p2[2]; } inline double norm(const Point3D& p1) { return std::sqrt(dot(p1, p1)); } inline Point3D barycenter(const Point3D& p1, const Point3D& p2) { return {0.5 * (p1[0] + p2[0]), 0.5 * (p1[1] + p2[1]), 0.5 * (p1[2] + p2[2])}; } inline Point3D paxpy(const Point3D& p1, const double alpha, const Point3D& p2) { return {p1[0] + alpha * p2[0], p1[1] + alpha * p2[1], p1[2] + alpha * p2[2]}; } /** * * \param e_pos electrode position * \param seg_pos segment position * \param radius segment radius * \param double conductivity factor 1/([4 pi] * [conductivity]) * \return Resistance of the medium from the segment to the electrode. */ inline double point_source_lfp_factor(const Point3D& e_pos, const Point3D& seg_pos, const double radius, const double f) { nrn_assert(radius >= 0.0); Point3D es = paxpy(e_pos, -1.0, seg_pos); return f / std::max(norm(es), radius); } /** * * \param e_pos electrode position * \param seg_pos segment position * \param radius segment radius * \param f conductivity factor 1/([4 pi] * [conductivity]) * \return Resistance of the medium from the segment to the electrode. */ double line_source_lfp_factor(const Point3D& e_pos, const Point3D& seg_0, const Point3D& seg_1, const double radius, const double f); } // namespace lfputils enum LFPCalculatorType { LineSource, PointSource }; /** * \brief LFPCalculator allows calculation of LFP given membrane currents. */ template struct LFPCalculator { /** * LFP Calculator constructor * \param seg_start all segments start owned by the proc * \param seg_end all segments end owned by the proc * \param radius fence around the segment. Ensures electrode cannot be * arbitrarily close to the segment * \param electrodes positions of the electrodes * \param extra_cellular_conductivity conductivity of the extra-cellular * medium */ LFPCalculator(const lfputils::Point3Ds& seg_start, const lfputils::Point3Ds& seg_end, const std::vector& radius, const std::vector& segment_ids, const lfputils::Point3Ds& electrodes, double extra_cellular_conductivity); template void lfp(const Vector& membrane_current); const std::vector& lfp_values() const noexcept { return lfp_values_; } private: inline double getFactor(const lfputils::Point3D& e_pos, const lfputils::Point3D& seg_0, const lfputils::Point3D& seg_1, const double radius, const double f) const; std::vector lfp_values_; std::vector> m; const std::vector& segment_ids_; }; template <> double LFPCalculator::getFactor(const lfputils::Point3D& e_pos, const lfputils::Point3D& seg_0, const lfputils::Point3D& seg_1, const double radius, const double f) const { return lfputils::line_source_lfp_factor(e_pos, seg_0, seg_1, radius, f); } template <> double LFPCalculator::getFactor(const lfputils::Point3D& e_pos, const lfputils::Point3D& seg_0, const lfputils::Point3D& seg_1, const double radius, const double f) const { return lfputils::point_source_lfp_factor(e_pos, lfputils::barycenter(seg_0, seg_1), radius, f); } extern template LFPCalculator::LFPCalculator(const lfputils::Point3Ds& seg_start, const lfputils::Point3Ds& seg_end, const std::vector& radius, const std::vector& segment_ids, const lfputils::Point3Ds& electrodes, double extra_cellular_conductivity); extern template LFPCalculator::LFPCalculator(const lfputils::Point3Ds& seg_start, const lfputils::Point3Ds& seg_end, const std::vector& radius, const std::vector& segment_ids, const lfputils::Point3Ds& electrodes, double extra_cellular_conductivity); extern template void LFPCalculator::lfp(const lfputils::DoublePtr& membrane_current); extern template void LFPCalculator::lfp(const lfputils::DoublePtr& membrane_current); extern template void LFPCalculator::lfp(const std::vector& membrane_current); extern template void LFPCalculator::lfp(const std::vector& membrane_current); } // namespace coreneuron ================================================ FILE: coreneuron/io/mech_report.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include "coreneuron/coreneuron.hpp" #include "coreneuron/io/nrn_setup.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/apps/corenrn_parameters.hpp" namespace coreneuron { /** display global mechanism count */ void write_mech_report() { /// mechanim count across all gids, local to rank const auto n_memb_func = corenrn.get_memb_funcs().size(); std::vector local_mech_count(n_memb_func, 0); std::vector local_mech_size(n_memb_func, 0); /// each gid record goes on separate row, only check non-empty threads for (int i = 0; i < nrn_nthread; i++) { const auto& nt = nrn_threads[i]; for (auto* tml = nt.tml; tml; tml = tml->next) { const int type = tml->index; const auto& ml = tml->ml; local_mech_count[type] += ml->nodecount; local_mech_size[type] = memb_list_size(tml, true); } } std::vector total_mech_count(n_memb_func); std::vector total_mech_size(n_memb_func); #if NRNMPI if (corenrn_param.mpi_enable) { /// get global sum of all mechanism instances nrnmpi_long_allreduce_vec(&local_mech_count[0], &total_mech_count[0], local_mech_count.size(), 1); nrnmpi_long_allreduce_vec(&local_mech_size[0], &total_mech_size[0], local_mech_size.size(), 1); } else #endif { total_mech_count = local_mech_count; total_mech_size = local_mech_size; } /// print global stats to stdout if (nrnmpi_myid == 0) { printf("\n============== MECHANISMS COUNT AND SIZE BY TYPE =============\n"); printf("%4s %20s %10s %25s\n", "Id", "Name", "Count", "Total memory size (KiB)"); for (size_t i = 0; i < total_mech_count.size(); i++) { if (total_mech_count[i] > 0) { printf("%4lu %20s %10ld %25.2lf\n", i, nrn_get_mechname(i), total_mech_count[i], static_cast(total_mech_size[i]) / 1024); } } printf("==============================================================\n"); } } } // namespace coreneuron ================================================ FILE: coreneuron/io/mech_report.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include namespace coreneuron { /// write mechanism counts to stdout void write_mech_report(); } // namespace coreneuron ================================================ FILE: coreneuron/io/mem_layout_util.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "mem_layout_util.hpp" namespace coreneuron { /// calculate size after padding for specific memory layout // Warning: this function is declared extern in nrniv_decl.h int nrn_soa_padded_size(int cnt, int layout) { return soa_padded_size(cnt, layout); } /// return the new offset considering the byte aligment settings size_t nrn_soa_byte_align(size_t size) { static_assert(NRN_SOA_BYTE_ALIGN % sizeof(double) == 0, "NRN_SOA_BYTE_ALIGN should be a multiple of sizeof(double)"); constexpr size_t dbl_align{NRN_SOA_BYTE_ALIGN / sizeof(double)}; size_t remainder{size % dbl_align}; if (remainder) { size += dbl_align - remainder; } nrn_assert((size * sizeof(double)) % NRN_SOA_BYTE_ALIGN == 0); return size; } int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout) { switch (layout) { case Layout::AoS: return icnt * sz + isz; case Layout::SoA: int padded_cnt = nrn_soa_padded_size(cnt, layout); // may want to factor out to save time return icnt + isz * padded_cnt; } nrn_assert(false); return 0; } // file data is AoS. ie. // organized as cnt array instances of mtype each of size sz. // So input index i refers to i_instance*sz + i_item offset // Return the corresponding SoA index -- taking into account the // alignment requirements. Ie. i_instance + i_item*align_cnt. int nrn_param_layout(int i, int mtype, Memb_list* ml) { int layout = corenrn.get_mech_data_layout()[mtype]; switch (layout) { case Layout::AoS: return i; case Layout::SoA: nrn_assert(layout == Layout::SoA); int sz = corenrn.get_prop_param_size()[mtype]; return nrn_i_layout(i / sz, ml->nodecount, i % sz, sz, layout); } nrn_assert(false); return 0; } } // namespace coreneuron ================================================ FILE: coreneuron/io/mem_layout_util.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/coreneuron.hpp" #include "coreneuron/nrniv/nrniv_decl.h" namespace coreneuron { #if !defined(NRN_SOA_PAD) // for layout 0, every range variable array must have a size which // is a multiple of NRN_SOA_PAD doubles #define NRN_SOA_PAD 8 #endif /// return the new offset considering the byte aligment settings size_t nrn_soa_byte_align(size_t i); /// This function return the index in a flat array of a matrix coordinate (icnt, isz). /// The matrix size is (cnt, sz) /// Depending of the layout some padding can be calculated int nrn_i_layout(int icnt, int cnt, int isz, int sz, int layout); // file data is AoS. ie. // organized as cnt array instances of mtype each of size sz. // So input index i refers to i_instance*sz + i_item offset // Return the corresponding SoA index -- taking into account the // alignment requirements. Ie. i_instance + i_item*align_cnt. int nrn_param_layout(int i, int mtype, Memb_list* ml); } // namespace coreneuron ================================================ FILE: coreneuron/io/mk_mech.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/membrane_definitions.h" #include "coreneuron/mechanism/register_mech.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/mechanism/mech/cfile/cabvars.h" #include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/mechanism//eion.hpp" static char banner[] = "Duke, Yale, and the BlueBrain Project -- Copyright 1984-2020"; namespace coreneuron { extern int nrn_nobanner_; // NB: this should go away extern std::string cnrn_version(); std::map mech2type; extern "C" { void (*nrn2core_mkmech_info_)(std::ostream&); } static void mk_mech(); static void mk_mech(std::istream&); /// Read meta data about the mechanisms and allocate corresponding mechanism management data /// structures void mk_mech(const char* datpath) { if (corenrn_embedded) { // we are embedded in NEURON mk_mech(); return; } { std::string fname = std::string(datpath) + "/bbcore_mech.dat"; std::ifstream fs(fname); if (!fs.good()) { fprintf(stderr, "Error: couldn't find bbcore_mech.dat file in the dataset directory \n"); fprintf(stderr, " Make sure to pass full directory path of dataset using -d DIR or " "--datpath=DIR \n"); } nrn_assert(fs.good()); mk_mech(fs); fs.close(); } } // we are embedded in NEURON, get info as stringstream from nrnbbcore_write.cpp static void mk_mech() { static bool already_called = false; if (already_called) { return; } std::stringstream ss; nrn_assert(nrn2core_mkmech_info_); (*nrn2core_mkmech_info_)(ss); mk_mech(ss); already_called = true; } static void mk_mech(std::istream& s) { char version[256]; s >> version; check_bbcore_write_version(version); // printf("reading %s\n", fname); int n = 0; nrn_assert(s >> n); /// Allocate space for mechanism related data structures alloc_mech(n); /// Read all the mechanisms and their meta data for (int i = 2; i < n; ++i) { char mname[100]; int type = 0, pnttype = 0, is_art = 0, is_ion = 0, dsize = 0, pdsize = 0; nrn_assert(s >> mname >> type >> pnttype >> is_art >> is_ion >> dsize >> pdsize); nrn_assert(i == type); #ifdef DEBUG printf("%s %d %d %d %d %d %d\n", mname, type, pnttype, is_art, is_ion, dsize, pdsize); #endif std::string str(mname); corenrn.get_memb_func(type).sym = (Symbol*) strdup(mname); mech2type[str] = type; corenrn.get_pnt_map()[type] = (char) pnttype; corenrn.get_prop_param_size()[type] = dsize; corenrn.get_prop_dparam_size()[type] = pdsize; corenrn.get_is_artificial()[type] = is_art; if (is_ion) { double charge = 0.; nrn_assert(s >> charge); // strip the _ion char iname[100]; strcpy(iname, mname); iname[strlen(iname) - 4] = '\0'; // printf("%s %s\n", mname, iname); ion_reg(iname, charge); } // printf("%s %d %d\n", mname, nrn_get_mechtype(mname), type); } if (nrnmpi_myid < 1 && nrn_nobanner_ == 0) { fprintf(stderr, " \n"); fprintf(stderr, " %s\n", banner); fprintf(stderr, " Version : %s\n", cnrn_version().c_str()); fprintf(stderr, " \n"); fflush(stderr); } /* will have to put this back if any mod file refers to diam */ // register_mech(morph_mech, morph_alloc, (Pfri)0, (Pfri)0, (Pfri)0, (Pfri)0, -1, 0); /// Calling _reg functions for the default mechanisms from the file mech/cfile/cabvars.h for (int i = 0; mechanism[i]; i++) { (*mechanism[i])(); } } /// Get mechanism type by the mechanism name int nrn_get_mechtype(const char* name) { auto mapit = mech2type.find(name); if (mapit == mech2type.end()) return -1; // Could not find the mechanism return mapit->second; } const char* nrn_get_mechname(int type) { for (const auto& item: mech2type) { if (type == item.second) { return item.first.c_str(); } } return nullptr; } } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn2core_data_init.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/nrnconf.h" #include "coreneuron/network/netpar.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/io/mem_layout_util.hpp" // for WATCH use of nrn_i_layout #include "coreneuron/utils/vrecitem.h" #include "coreneuron/io/core2nrn_data_return.hpp" namespace coreneuron { // helper functions defined below. static void nrn2core_tqueue(); static void watch_activate_clear(); static void nrn2core_transfer_watch_condition(int, int, int, int, int); static void vec_play_activate(); static void nrn2core_patstim_share_info(); extern "C" { /** Pointer to function in NEURON that iterates over activated WATCH statements, sending each item to ... **/ void (*nrn2core_transfer_watch_)(void (*cb)(int, int, int, int, int)); } /** All state from NEURON necessary to continue a run. In NEURON direct mode, we desire the exact behavior of ParallelContext.psolve(tstop). I.e. a sequence of such calls with and without intervening calls to h.finitialize(). Most state (structure and data of the substantive model) has been copied from NEURON during nrn_setup. Now we need to copy the event queue and set up any other invalid internal structures. I.e basically the nrn_finitialize above but without changing any simulation data. We follow some of the strategy of checkpoint_initialize. **/ void direct_mode_initialize() { dt2thread(-1.); nrn_thread_table_check(); clear_event_queue(); // Reproduce present NEURON WATCH activation // Start from nothing active. watch_activate_clear(); // nrn2core_transfer_watch_condition(...) receives the WATCH activation info // on a per active WatchCondition basis from NEURON. (*nrn2core_transfer_watch_)(nrn2core_transfer_watch_condition); nrn_spike_exchange_init(); // the things done by checkpoint restore at the end of Phase2::read_file // vec_play_continuous n_vec_play_continuous of them // patstim_index // preSynConditionEventFlags nt.n_presyn of them // restore_events // restore_events // the things done for checkpoint at the end of Phase2::populate // checkpoint_restore_tqueue // Lastly, if PatternStim exists, needs initialization // checkpoint_restore_patternstim // io/nrn_checkpoint.cpp: write_tqueue contains examples for each // DiscreteEvent type with regard to the information needed for each // subclass from the point of view of CoreNEURON. // E.g. for NetConType_, just netcon_index // The trick, then, is to figure out the CoreNEURON info from the // NEURON queue items and that should be available in passing from // the existing processing of nrncore_write. // activate the vec_play_continuous events defined in phase2 setup. vec_play_activate(); // Any PreSyn.flag_ == 1 on the NEURON side needs to be transferred // or the PreSyn will spuriously fire when psolve starts. extern void nrn2core_PreSyn_flag_receive(int tid); for (int tid = 0; tid < nrn_nthread; ++tid) { nrn2core_PreSyn_flag_receive(tid); } nrn2core_patstim_share_info(); nrn2core_tqueue(); } void vec_play_activate() { for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread* nt = nrn_threads + tid; for (int i = 0; i < nt->n_vecplay; ++i) { PlayRecord* pr = (PlayRecord*) nt->_vecplay[i]; assert(pr->type() == VecPlayContinuousType); VecPlayContinuous* vpc = (VecPlayContinuous*) pr; assert(vpc->e_); assert(vpc->discon_indices_ == nullptr); // not implemented vpc->e_->send(vpc->t_[vpc->ubound_index_], net_cvode_instance, nt); } } } } // namespace coreneuron // For direct transfer of event queue information // Must be the same as corresponding struct NrnCoreTransferEvents in NEURON // Do not put this coreneuron version in the coreneuron namespace so that the // function pointer/callback has the same type in both NEURON and CoreNEURON. // Calling a function through a pointer to a function of different type is // undefined behaviour. struct NrnCoreTransferEvents { std::vector type; // DiscreteEvent type std::vector td; // delivery time std::vector intdata; // ints specific to the DiscreteEvent type std::vector dbldata; // doubles specific to the type. }; namespace coreneuron { extern "C" { /** Pointer to function in NEURON that iterates over its tqeueue **/ NrnCoreTransferEvents* (*nrn2core_transfer_tqueue_)(int tid); } // for faster determination of the movable index given the type static std::unordered_map type2movable; static void setup_type2semantics() { if (type2movable.empty()) { std::size_t const n_memb_func{corenrn.get_memb_funcs().size()}; for (std::size_t type = 0; type < n_memb_func; ++type) { int* ds{corenrn.get_memb_func(type).dparam_semantics}; if (ds) { int dparam_size = corenrn.get_prop_dparam_size()[type]; for (int psz = 0; psz < dparam_size; ++psz) { if (ds[psz] == -4) { // netsend semantics type2movable[type] = psz; } } } } } } /** Copy each thread's queue from NEURON **/ static void nrn2core_tqueue() { setup_type2semantics(); // need type2movable for SelfEvent. for (int tid = 0; tid < nrn_nthread; ++tid) { // should be parallel NrnCoreTransferEvents* ncte = (*nrn2core_transfer_tqueue_)(tid); if (ncte) { size_t idat = 0; size_t idbldat = 0; NrnThread& nt = nrn_threads[tid]; for (size_t i = 0; i < ncte->type.size(); ++i) { switch (ncte->type[i]) { case 0: { // DiscreteEvent // Ignore } break; case 2: { // NetCon int ncindex = ncte->intdata[idat++]; NetCon* nc = nt.netcons + ncindex; #ifndef CORENRN_DEBUG_QUEUE #define CORENRN_DEBUG_QUEUE 0 #endif #if CORENRN_DEBUG_QUEUE printf("nrn2core_tqueue tid=%d i=%zd type=%d tdeliver=%g NetCon %d\n", tid, i, ncte->type[i], ncte->td[i], ncindex); #endif nc->send(ncte->td[i], net_cvode_instance, &nt); } break; case 3: { // SelfEvent // target_type, target_instance, weight_index, flag movable // This is a nightmare and needs to be profoundly re-imagined. // Determine Point_process* int target_type = ncte->intdata[idat++]; int target_instance = ncte->intdata[idat++]; // From target_type and target_instance (mechanism data index) // compute the nt.pntprocs index. int offset = nt._pnt_offset[target_type]; Point_process* pnt = nt.pntprocs + offset + target_instance; assert(pnt->_type == target_type); Memb_list* ml = nt._ml_list[target_type]; if (ml->_permute) { target_instance = ml->_permute[target_instance]; } assert(pnt->_i_instance == target_instance); assert(pnt->_tid == tid); // Determine weight_index int netcon_index = ncte->intdata[idat++]; // via the NetCon int weight_index = -1; // no associated netcon if (netcon_index >= 0) { weight_index = nt.netcons[netcon_index].u.weight_index_; } double flag = ncte->dbldata[idbldat++]; int is_movable = ncte->intdata[idat++]; // If the queue item is movable, then the pointer needs to be // stored in the mechanism instance movable slot by net_send. // And don't overwrite if not movable. Only one SelfEvent // for a given target instance is movable. int movable_index = nrn_i_layout(target_instance, ml->nodecount, type2movable[target_type], corenrn.get_prop_dparam_size()[target_type], corenrn.get_mech_data_layout()[target_type]); void** movable_arg = nt._vdata + ml->pdata[movable_index]; TQItem* old_movable_arg = (TQItem*) (*movable_arg); #if CORENRN_DEBUG_QUEUE printf("nrn2core_tqueue tid=%d i=%zd type=%d tdeliver=%g SelfEvent\n", tid, i, ncte->type[i], ncte->td[i]); printf( " target_type=%d pnt data index=%d flag=%g is_movable=%d netcon index " "for weight=%d\n", target_type, target_instance, flag, is_movable, netcon_index); #endif net_send(movable_arg, weight_index, pnt, ncte->td[i], flag); if (!is_movable) { *movable_arg = (void*) old_movable_arg; } } break; case 4: { // PreSyn int type = ncte->intdata[idat++]; if (type == 0) { // CoreNEURON PreSyn int ps_index = ncte->intdata[idat++]; #if CORENRN_DEBUG_QUEUE printf("nrn2core_tqueue tid=%d i=%zd type=%d tdeliver=%g PreSyn %d\n", tid, i, ncte->type[i], ncte->td[i], ps_index); #endif PreSyn* ps = nt.presyns + ps_index; int gid = ps->output_index_; // Following assumes already sent to other machines. ps->output_index_ = -1; ps->send(ncte->td[i], net_cvode_instance, &nt); ps->output_index_ = gid; } else { // CoreNEURON InputPreSyn int gid = ncte->intdata[idat++]; InputPreSyn* ps = gid2in[gid]; ps->send(ncte->td[i], net_cvode_instance, &nt); } } break; case 6: { // PlayRecordEvent // Ignore as phase2 handles analogous to checkpoint restore. } break; case 7: { // NetParEvent #if CORENRN_DEBUG_QUEUE printf("nrn2core_tqueue tid=%d i=%zd type=%d tdeliver=%g NetParEvent\n", tid, i, ncte->type[i], ncte->td[i]); #endif } break; default: { std::stringstream qetype; qetype << ncte->type[i]; hoc_execerror("Unimplemented transfer queue event type:", qetype.str().c_str()); } break; } } delete ncte; } } } /** @brief return first and last datum indices of WATCH statements */ void watch_datum_indices(int type, int& first, int& last) { int* semantics = corenrn.get_memb_func(type).dparam_semantics; int dparam_size = corenrn.get_prop_dparam_size()[type]; // which slots are WATCH // Note that first is the WatchList item, not the WatchCondition first = -1; last = 0; for (int i = 0; i < dparam_size; ++i) { if (semantics[i] == -8) { // WATCH if (first == -1) { first = i; } last = i; } } } void watch_activate_clear() { // Can identify mechanisms with WATCH statements from non-NULL // corenrn.get_watch_check()[type] and figure out pdata that are // _watch_array items from corenrn.get_memb_func(type).dparam_semantics // Ironically, all WATCH statements may already be inactivated in // consequence of phase2 transfer. But, for direct mode psolve, we would // eventually like to minimise that transfer (at least with respect to // structure). // Loop over threads, mechanisms and pick out the ones with WATCH statements. for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread& nt = nrn_threads[tid]; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { if (corenrn.get_watch_check()[tml->index]) { // zero all the WATCH slots. Memb_list* ml = tml->ml; int type = tml->index; int dparam_size = corenrn.get_prop_dparam_size()[type]; // which slots are WATCH int first, last; watch_datum_indices(type, first, last); // Zero the _watch_array from first to last inclusive. // Note: the first is actually unused but is there because NEURON // uses it. There is probably a better way to do this. int* pdata = ml->pdata; int nodecount = ml->nodecount; int layout = corenrn.get_mech_data_layout()[type]; for (int iml = 0; iml < nodecount; ++iml) { for (int i = first; i <= last; ++i) { int* pd = pdata + nrn_i_layout(iml, nodecount, i, dparam_size, layout); *pd = 0; } } } } } } void nrn2core_transfer_watch_condition(int tid, int pnttype, int pntindex, int watch_index, int triggered) { // Note: watch_index relative to AoS _ppvar for instance. NrnThread& nt = nrn_threads[tid]; int pntoffset = nt._pnt_offset[pnttype]; Point_process* pnt = nt.pntprocs + (pntoffset + pntindex); assert(pnt->_type == pnttype); Memb_list* ml = nt._ml_list[pnttype]; if (ml->_permute) { pntindex = ml->_permute[pntindex]; } assert(pnt->_i_instance == pntindex); assert(pnt->_tid == tid); // perhaps all this should be more closely associated with phase2 since // we are really talking about (direct) transfer from NEURON and not able // to rely on finitialize() on the CoreNEURON side which would otherwise // set up all this stuff as a consequence of SelfEvents initiated // and delivered at time 0. // I've become shakey in regard to how this is done since the reorganization // from where everything was done in nrn_setup.cpp. Here, I'm guessing // nrn_i_layout is the relevant index transformation after finding the // beginning of the mechanism pdata. int* pdata = ml->pdata; int iml = pntindex; int nodecount = ml->nodecount; int i = watch_index; int dparam_size = corenrn.get_prop_dparam_size()[pnttype]; int layout = corenrn.get_mech_data_layout()[pnttype]; int* pd = pdata + nrn_i_layout(iml, nodecount, i, dparam_size, layout); // activate the WatchCondition *pd = 2 + triggered; } // PatternStim direct mode // NEURON and CoreNEURON had different definitions for struct Info but // the NEURON version of pattern.mod for PatternStim was changed to // adopt the CoreNEURON version (along with THREADSAFE so they have the // same param size). So they now both share the same // instance of Info and NEURON is responsible for constructor/destructor. // And in direct mode, PatternStim gets no special treatment except that // on the CoreNEURON side, the Info struct points to the NEURON instance. // from patstim.mod extern void** pattern_stim_info_ref(int icnt, int cnt, double* _p, Datum* _ppvar, ThreadDatum* _thread, NrnThread* _nt, Memb_list* ml, double v); extern "C" { void (*nrn2core_patternstim_)(void** info); } // In direct mode, CoreNEURON and NEURON share the same PatternStim Info // Assume singleton for PatternStim but that is not really necessary in principle. void nrn2core_patstim_share_info() { int type = nrn_get_mechtype("PatternStim"); NrnThread* nt = nrn_threads + 0; Memb_list* ml = nt->_ml_list[type]; if (ml) { int layout = corenrn.get_mech_data_layout()[type]; int sz = corenrn.get_prop_param_size()[type]; int psz = corenrn.get_prop_dparam_size()[type]; int _cntml = ml->nodecount; assert(ml->nodecount == 1); int _iml = 0; // Assume singleton here and in (*nrn2core_patternstim_)(info) below. double* _p = ml->data; Datum* _ppvar = ml->pdata; if (layout == Layout::AoS) { _p += _iml * sz; _ppvar += _iml * psz; } else if (layout == Layout::SoA) { ; } else { assert(0); } void** info = pattern_stim_info_ref(_iml, _cntml, _p, _ppvar, nullptr, nt, ml, 0.0); (*nrn2core_patternstim_)(info); } } } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn2core_direct.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include extern "C" { // The callbacks into nrn/src/nrniv/nrnbbcore_write.cpp to get // data directly instead of via files. extern bool corenrn_embedded; extern int corenrn_embedded_nthread; extern void (*nrn2core_group_ids_)(int*); extern void (*nrn2core_mkmech_info_)(std::ostream&); extern void* (*nrn2core_get_global_dbl_item_)(void*, const char*& name, int& size, double*& val); extern int (*nrn2core_get_global_int_item_)(const char* name); extern int (*nrn2core_get_dat1_)(int tid, int& n_presyn, int& n_netcon, int*& output_gid, int*& netcon_srcgid, std::vector& netcon_negsrcgid_tid); extern int (*nrn2core_get_dat2_1_)(int tid, int& n_real_cell, int& ngid, int& n_real_gid, int& nnode, int& ndiam, int& nmech, int*& tml_index, int*& ml_nodecount, int& nidata, int& nvdata, int& nweight); extern int (*nrn2core_get_dat2_2_)(int tid, int*& v_parent_index, double*& a, double*& b, double*& area, double*& v, double*& diamvec); extern int (*nrn2core_get_dat2_mech_)(int tid, size_t i, int dsz_inst, int*& nodeindices, double*& data, int*& pdata, std::vector& pointer2type); extern int (*nrn2core_get_dat2_3_)(int tid, int nweight, int*& output_vindex, double*& output_threshold, int*& netcon_pnttype, int*& netcon_pntindex, double*& weights, double*& delays); extern int (*nrn2core_get_dat2_corepointer_)(int tid, int& n); extern int (*nrn2core_get_dat2_corepointer_mech_)(int tid, int type, int& icnt, int& dcnt, int*& iarray, double*& darray); extern int (*nrn2core_get_dat2_vecplay_)(int tid, std::vector& indices); extern int (*nrn2core_get_dat2_vecplay_inst_)(int tid, int i, int& vptype, int& mtype, int& ix, int& sz, double*& yvec, double*& tvec, int& last_index, int& discon_index, int& ubound_index); extern void (*nrn2core_part2_clean_)(); /* what variables to send back to NEURON on each time step */ extern void (*nrn2core_get_trajectory_requests_)(int tid, int& bsize, int& n_pr, void**& vpr, int& n_trajec, int*& types, int*& indices, double**& pvars, double**& varrays); /* send values to NEURON on each time step */ extern void (*nrn2core_trajectory_values_)(int tid, int n_pr, void** vpr, double t); /* Filled the Vector data arrays and send back the sizes at end of run */ extern void ( *nrn2core_trajectory_return_)(int tid, int n_pr, int bsize, int vecsz, void** vpr, double t); /* send all spikes vectors to NEURON */ extern int (*nrn2core_all_spike_vectors_return_)(std::vector& spikevec, std::vector& gidvec); /* send all weights to NEURON */ extern void (*nrn2core_all_weights_return_)(std::vector& weights); /* get data array pointer from NEURON to copy into. */ extern size_t (*nrn2core_type_return_)(int type, int tid, double*& data, double**& mdata); } // extern "C" ================================================ FILE: coreneuron/io/nrn_checkpoint.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/io/nrn_filehandler.hpp" #include "coreneuron/io/nrn_checkpoint.hpp" #include "coreneuron/io/nrn_setup.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/network/netpar.hpp" #include "coreneuron/utils/vrecitem.h" #include "coreneuron/mechanism/mech/mod2c_core_thread.hpp" #include "coreneuron/io/file_utils.hpp" #include "coreneuron/permute/data_layout.hpp" #include "coreneuron/permute/node_permute.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" namespace coreneuron { // Those functions comes from mod file directly extern int checkpoint_save_patternstim(_threadargsproto_); extern void checkpoint_restore_patternstim(int, double, _threadargsproto_); CheckPoints::CheckPoints(const std::string& save, const std::string& restore) : save_(save) , restore_(restore) , restored(false) { if (!save.empty()) { if (nrnmpi_myid == 0) { mkdir_p(save.c_str()); } } } /// todo : need to broadcast this rather than all reading a double double CheckPoints::restore_time() const { if (!should_restore()) { return 0.; } double rtime = 0.; FileHandler f; std::string filename = restore_ + "/time.dat"; f.open(filename, std::ios::in); f.read_array(&rtime, 1); f.close(); return rtime; } void CheckPoints::write_checkpoint(NrnThread* nt, int nb_threads) const { if (!should_save()) { return; } #if NRNMPI if (corenrn_param.mpi_enable) { nrnmpi_barrier(); } #endif /** * if openmp threading needed: * #pragma omp parallel for private(i) shared(nt, nb_threads) schedule(runtime) */ for (int i = 0; i < nb_threads; i++) { if (nt[i].ncell || nt[i].tml) { write_phase2(nt[i]); } } if (nrnmpi_myid == 0) { write_time(); } #if NRNMPI if (corenrn_param.mpi_enable) { nrnmpi_barrier(); } #endif } // Factor out the body of ion handling below as the same code // handles POINTER static int nrn_original_aos_index(int etype, int ix, NrnThread& nt, int** ml_pinv) { // Determine ei_instance and ei from etype and ix. // Deal with existing permutation and SoA. Memb_list* eml = nt._ml_list[etype]; int ecnt = eml->nodecount; int esz = corenrn.get_prop_param_size()[etype]; int elayout = corenrn.get_mech_data_layout()[etype]; // current index into eml->data is a function // of elayout, eml._permute, ei_instance, ei, and // eml padding. int p = ix - (eml->data - nt._data); assert(p >= 0 && p < eml->_nodecount_padded * esz); int ei_instance, ei; nrn_inverse_i_layout(p, ei_instance, ecnt, ei, esz, elayout); if (elayout == Layout::SoA) { if (eml->_permute) { if (!ml_pinv[etype]) { ml_pinv[etype] = inverse_permute(eml->_permute, eml->nodecount); } ei_instance = ml_pinv[etype][ei_instance]; } } return ei_instance * esz + ei; } void CheckPoints::write_phase2(NrnThread& nt) const { FileHandler fh; NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id]; auto filename = get_save_path() + "/" + std::to_string(ntc.file_id) + "_2.dat"; fh.open(filename, std::ios::out); fh.checkpoint(2); int n_outputgid = 0; // calculate PreSyn with gid >= 0 for (int i = 0; i < nt.n_presyn; ++i) { if (nt.presyns[i].gid_ >= 0) { ++n_outputgid; } } fh << nt.ncell << " ncell\n"; fh << n_outputgid << " ngid\n"; #if CHKPNTDEBUG assert(ntc.n_outputgids == n_outputgid); #endif fh << nt.n_real_output << " n_real_output\n"; fh << nt.end << " nnode\n"; fh << ((nt._actual_diam == nullptr) ? 0 : nt.end) << " ndiam\n"; int nmech = 0; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { if (tml->index != patstimtype) { // skip PatternStim ++nmech; } } fh << nmech << " nmech\n"; #if CHKPNTDEBUG assert(nmech == ntc.nmech); #endif for (NrnThreadMembList* current_tml = nt.tml; current_tml; current_tml = current_tml->next) { if (current_tml->index == patstimtype) { continue; } fh << current_tml->index << "\n"; fh << current_tml->ml->nodecount << "\n"; } fh << nt._nidata << " nidata\n"; fh << nt._nvdata << " nvdata\n"; fh << nt.n_weight << " nweight\n"; // see comment about parent in node_permute.cpp int* pinv_nt = nullptr; if (nt._permute) { int* d = new int[nt.end]; pinv_nt = inverse_permute(nt._permute, nt.end); for (int i = 0; i < nt.end; ++i) { int x = nt._v_parent_index[nt._permute[i]]; if (x >= 0) { d[i] = pinv_nt[x]; } else { d[i] = 0; // really should be -1; } } #if CHKPNTDEBUG for (int i = 0; i < nt.end; ++i) { assert(d[i] == ntc.parent[i]); } #endif fh.write_array(d, nt.end); delete[] d; } else { #if CHKPNTDEBUG for (int i = 0; i < nt.end; ++i) { assert(nt._v_parent_index[i] == ntc.parent[i]); } #endif fh.write_array(nt._v_parent_index, nt.end); pinv_nt = new int[nt.end]; for (int i = 0; i < nt.end; ++i) { pinv_nt[i] = i; } } data_write(fh, nt._actual_a, nt.end, 1, 0, nt._permute); data_write(fh, nt._actual_b, nt.end, 1, 0, nt._permute); #if CHKPNTDEBUG for (int i = 0; i < nt.end; ++i) { assert(nt._actual_area[i] == ntc.area[pinv_nt[i]]); } #endif data_write(fh, nt._actual_area, nt.end, 1, 0, nt._permute); data_write(fh, nt._actual_v, nt.end, 1, 0, nt._permute); if (nt._actual_diam) { data_write(fh, nt._actual_diam, nt.end, 1, 0, nt._permute); } auto& memb_func = corenrn.get_memb_funcs(); // will need the ml_pinv inverse permutation of ml._permute for ions and POINTER int** ml_pinv = (int**) ecalloc(memb_func.size(), sizeof(int*)); for (NrnThreadMembList* current_tml = nt.tml; current_tml; current_tml = current_tml->next) { Memb_list* ml = current_tml->ml; int type = current_tml->index; if (type == patstimtype) { continue; } int cnt = ml->nodecount; auto& nrn_prop_param_size_ = corenrn.get_prop_param_size(); auto& nrn_prop_dparam_size_ = corenrn.get_prop_dparam_size(); auto& nrn_is_artificial_ = corenrn.get_is_artificial(); int sz = nrn_prop_param_size_[type]; int layout = corenrn.get_mech_data_layout()[type]; int* semantics = memb_func[type].dparam_semantics; if (!nrn_is_artificial_[type]) { // ml->nodeindices values are permuted according to nt._permute // and locations according to ml._permute // i.e. according to comment in node_permute.cpp // nodelist[p_m[i]] = p[nodelist_original[i] // so pinv[nodelist[p_m[i]] = nodelist_original[i] int* nd_ix = new int[cnt]; for (int i = 0; i < cnt; ++i) { int ip = ml->_permute ? ml->_permute[i] : i; int ipval = ml->nodeindices[ip]; nd_ix[i] = pinv_nt[ipval]; } fh.write_array(nd_ix, cnt); delete[] nd_ix; } data_write(fh, ml->data, cnt, sz, layout, ml->_permute); sz = nrn_prop_dparam_size_[type]; if (sz) { // need to update some values according to Datum semantics. int* d = soa2aos(ml->pdata, cnt, sz, layout, ml->_permute); std::vector pointer2type; // voltage or mechanism type (starts empty) if (!nrn_is_artificial_[type]) { for (int i_instance = 0; i_instance < cnt; ++i_instance) { for (int i = 0; i < sz; ++i) { int ix = i_instance * sz + i; int s = semantics[i]; if (s == -1) { // area int p = pinv_nt[d[ix] - (nt._actual_area - nt._data)]; d[ix] = p; // relative _actual_area } else if (s == -9) { // diam int p = pinv_nt[d[ix] - (nt._actual_diam - nt._data)]; d[ix] = p; // relative to _actual_diam } else if (s == -5) { // POINTER // loop over instances, then sz, means that we // visit consistent with natural order of // pointer2type // Relevant code that this has to invert // is permute/node_permute.cpp :: update_pdata_values with // respect to permutation, and // io/phase2.cpp :: Phase2::pdata_relocation // with respect to that AoS -> SoA // Step 1: what mechanism is d[ix] pointing to int ptype = type_of_ntdata(nt, d[ix], i_instance == 0); pointer2type.push_back(ptype); // Step 2: replace d[ix] with AoS index relative to type if (ptype == voltage) { int p = pinv_nt[d[ix] - (nt._actual_v - nt._data)]; d[ix] = p; // relative to _actual_v } else { // Since we know ptype, the situation is // identical to ion below. (which was factored // out into the following function. d[ix] = nrn_original_aos_index(ptype, d[ix], nt, ml_pinv); } } else if (s >= 0 && s < 1000) { // ion d[ix] = nrn_original_aos_index(s, d[ix], nt, ml_pinv); } #if CHKPNTDEBUG if (s != -8) { // WATCH values change assert(d[ix] == ntc.mlmap[type]->pdata_not_permuted[i_instance * sz + i]); } #endif } } } fh.write_array(d, cnt * sz); delete[] d; size_t s = pointer2type.size(); fh << s << " npointer\n"; if (s) { fh.write_array(pointer2type.data(), s); } } } int nnetcon = nt.n_netcon; int* output_vindex = new int[nt.n_presyn]; double* output_threshold = new double[nt.n_real_output]; for (int i = 0; i < nt.n_presyn; ++i) { PreSyn* ps = nt.presyns + i; if (ps->thvar_index_ >= 0) { // real cell and index into (permuted) actual_v // if any assert fails in this loop then we have faulty understanding // of the for (int i = 0; i < nt.n_presyn; ++i) loop in nrn_setup.cpp assert(ps->thvar_index_ < nt.end); assert(ps->pntsrc_ == nullptr); output_threshold[i] = ps->threshold_; output_vindex[i] = pinv_nt[ps->thvar_index_]; } else if (i < nt.n_real_output) { // real cell without a presyn output_threshold[i] = 0.0; // the way it was set in nrnbbcore_write.cpp output_vindex[i] = -1; } else { Point_process* pnt = ps->pntsrc_; assert(pnt); int type = pnt->_type; int ix = pnt->_i_instance; if (nt._ml_list[type]->_permute) { // pnt->_i_instance is the permuted index into pnt->_type if (!ml_pinv[type]) { Memb_list* ml = nt._ml_list[type]; ml_pinv[type] = inverse_permute(ml->_permute, ml->nodecount); } ix = ml_pinv[type][ix]; } output_vindex[i] = -(ix * 1000 + type); } } fh.write_array(output_vindex, nt.n_presyn); fh.write_array(output_threshold, nt.n_real_output); #if CHKPNTDEBUG for (int i = 0; i < nt.n_presyn; ++i) { assert(ntc.output_vindex[i] == output_vindex[i]); } for (int i = 0; i < nt.n_real_output; ++i) { assert(ntc.output_threshold[i] == output_threshold[i]); } #endif delete[] output_vindex; delete[] output_threshold; delete[] pinv_nt; int synoffset = 0; std::vector pnt_offset(memb_func.size(), -1); for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { int type = tml->index; if (corenrn.get_pnt_map()[type] > 0) { pnt_offset[type] = synoffset; synoffset += tml->ml->nodecount; } } int* pnttype = new int[nnetcon]; int* pntindex = new int[nnetcon]; double* delay = new double[nnetcon]; for (int i = 0; i < nnetcon; ++i) { NetCon& nc = nt.netcons[i]; Point_process* pnt = nc.target_; if (pnt == nullptr) { // nrn_setup.cpp allows type <=0 which generates nullptr target. pnttype[i] = 0; pntindex[i] = -1; } else { pnttype[i] = pnt->_type; // todo: this seems most natural, but does not work. Perhaps should look // into how pntindex determined in nrnbbcore_write.cpp and change there. // int ix = pnt->_i_instance; // if (ml_pinv[pnt->_type]) { // ix = ml_pinv[pnt->_type][ix]; // } // follow the inverse of nrn_setup.cpp using pnt_offset computed above. int ix = (pnt - nt.pntprocs) - pnt_offset[pnt->_type]; pntindex[i] = ix; } delay[i] = nc.delay_; } fh.write_array(pnttype, nnetcon); fh.write_array(pntindex, nnetcon); fh.write_array(nt.weights, nt.n_weight); fh.write_array(delay, nnetcon); #if CHKPNTDEBUG for (int i = 0; i < nnetcon; ++i) { assert(ntc.pnttype[i] == pnttype[i]); assert(ntc.pntindex[i] == pntindex[i]); assert(ntc.delay[i] == delay[i]); } #endif delete[] pnttype; delete[] pntindex; delete[] delay; // BBCOREPOINTER int nbcp = 0; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { if (corenrn.get_bbcore_read()[tml->index] && tml->index != patstimtype) { ++nbcp; } } fh << nbcp << " bbcorepointer\n"; #if CHKPNTDEBUG assert(nbcp == ntc.nbcp); #endif nbcp = 0; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { if (corenrn.get_bbcore_read()[tml->index] && tml->index != patstimtype) { int i = nbcp++; int type = tml->index; assert(corenrn.get_bbcore_write()[type]); Memb_list* ml = tml->ml; double* d = nullptr; Datum* pd = nullptr; int layout = corenrn.get_mech_data_layout()[type]; int dsz = corenrn.get_prop_param_size()[type]; int pdsz = corenrn.get_prop_dparam_size()[type]; int aln_cntml = nrn_soa_padded_size(ml->nodecount, layout); fh << type << "\n"; int icnt = 0; int dcnt = 0; // data size and allocate for (int j = 0; j < ml->nodecount; ++j) { int jp = j; if (ml->_permute) { jp = ml->_permute[j]; } d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( nullptr, nullptr, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } fh << icnt << "\n"; fh << dcnt << "\n"; #if CHKPNTDEBUG assert(ntc.bcptype[i] == type); assert(ntc.bcpicnt[i] == icnt); assert(ntc.bcpdcnt[i] == dcnt); #endif int* iArray = nullptr; double* dArray = nullptr; if (icnt) { iArray = new int[icnt]; } if (dcnt) { dArray = new double[dcnt]; } icnt = dcnt = 0; for (int j = 0; j < ml->nodecount; j++) { int jp = j; if (ml->_permute) { jp = ml->_permute[j]; } d = ml->data + nrn_i_layout(jp, ml->nodecount, 0, dsz, layout); pd = ml->pdata + nrn_i_layout(jp, ml->nodecount, 0, pdsz, layout); (*corenrn.get_bbcore_write()[type])( dArray, iArray, &dcnt, &icnt, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } if (icnt) { fh.write_array(iArray, icnt); delete[] iArray; } if (dcnt) { fh.write_array(dArray, dcnt); delete[] dArray; } ++i; } } fh << nt.n_vecplay << " VecPlay instances\n"; for (int i = 0; i < nt.n_vecplay; i++) { PlayRecord* pr = (PlayRecord*) nt._vecplay[i]; int vtype = pr->type(); int mtype = -1; int ix = -1; // not as efficient as possible but there should not be too many Memb_list* ml = nullptr; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { ml = tml->ml; int nn = corenrn.get_prop_param_size()[tml->index] * ml->nodecount; if (nn && pr->pd_ >= ml->data && pr->pd_ < (ml->data + nn)) { mtype = tml->index; ix = (pr->pd_ - ml->data); break; } } assert(mtype >= 0); int icnt, isz; nrn_inverse_i_layout(ix, icnt, ml->nodecount, isz, corenrn.get_prop_param_size()[mtype], corenrn.get_mech_data_layout()[mtype]); if (ml_pinv[mtype]) { icnt = ml_pinv[mtype][icnt]; } ix = nrn_i_layout( icnt, ml->nodecount, isz, corenrn.get_prop_param_size()[mtype], AOS_LAYOUT); fh << vtype << "\n"; fh << mtype << "\n"; fh << ix << "\n"; #if CHKPNTDEBUG assert(ntc.vtype[i] == vtype); assert(ntc.mtype[i] == mtype); assert(ntc.vecplay_ix[i] == ix); #endif if (vtype == VecPlayContinuousType) { VecPlayContinuous* vpc = (VecPlayContinuous*) pr; int sz = vpc->y_.size(); fh << sz << "\n"; fh.write_array(vpc->y_.data(), sz); fh.write_array(vpc->t_.data(), sz); } else { std::cerr << "Error checkpointing vecplay type" << std::endl; assert(0); } } for (size_t i = 0; i < memb_func.size(); ++i) { if (ml_pinv[i]) { delete[] ml_pinv[i]; } } free(ml_pinv); write_tqueue(nt, fh); fh.close(); } void CheckPoints::write_time() const { FileHandler f; auto filename = get_save_path() + "/time.dat"; f.open(filename, std::ios::out); f.write_array(&t, 1); f.close(); } // A call to finitialize must be avoided after restoring the checkpoint // as that would change all states to a voltage clamp initialization. // Nevertheless t and some spike exchange and other computer state needs to // be initialized. // Also it is occasionally the case that nrn_init allocates data so we // need to call it but avoid the internal call to initmodel. // Consult finitialize.c to help decide what should be here bool CheckPoints::initialize() { dt2thread(-1.); nrn_thread_table_check(); nrn_spike_exchange_init(); allocate_data_in_mechanism_nrn_init(); // if PatternStim exists, needs initialization for (NrnThreadMembList* tml = nrn_threads[0].tml; tml; tml = tml->next) { if (tml->index == patstimtype && patstim_index >= 0 && patstim_te > 0.0) { Memb_list* ml = tml->ml; checkpoint_restore_patternstim(patstim_index, patstim_te, /* below correct only for AoS */ 0, ml->nodecount, ml->data, ml->pdata, ml->_thread, nrn_threads, ml, 0.0); break; } } // Check that bbcore_write is defined if we want to use checkpoint for (NrnThreadMembList* tml = nrn_threads[0].tml; tml; tml = tml->next) { auto type = tml->index; if (corenrn.get_bbcore_read()[type] && !corenrn.get_bbcore_write()[type]) { auto memb_func = corenrn.get_memb_func(type); fprintf(stderr, "Checkpoint is requested involving BBCOREPOINTER but there is no bbcore_write" " function for %s\n", memb_func.sym); assert(corenrn.get_bbcore_write()[type]); } } return restored; } template T* CheckPoints::soa2aos(T* data, int cnt, int sz, int layout, int* permute) const { // inverse of F -> data. Just a copy if layout=1. If SoA, // original file order depends on padding and permutation. // Good for a, b, area, v, diam, Memb_list.data, or anywhere values do not change. T* d = new T[cnt * sz]; if (layout == Layout::AoS) { for (int i = 0; i < cnt * sz; ++i) { d[i] = data[i]; } } else if (layout == Layout::SoA) { int align_cnt = nrn_soa_padded_size(cnt, layout); for (int i = 0; i < cnt; ++i) { int ip = i; if (permute) { ip = permute[i]; } for (int j = 0; j < sz; ++j) { d[i * sz + j] = data[ip + j * align_cnt]; } } } return d; } template void CheckPoints::data_write(FileHandler& F, T* data, int cnt, int sz, int layout, int* permute) const { T* d = soa2aos(data, cnt, sz, layout, permute); F.write_array(d, cnt * sz); delete[] d; } NrnThreadChkpnt* nrnthread_chkpnt; int patstimtype; void CheckPoints::write_tqueue(TQItem* q, NrnThread& nt, FileHandler& fh) const { DiscreteEvent* d = (DiscreteEvent*) q->data_; // printf(" p %.20g %d\n", q->t_, d->type()); // d->pr("", q->t_, net_cvode_instance); if (!d->require_checkpoint()) { return; } fh << d->type() << "\n"; fh.write_array(&q->t_, 1); switch (d->type()) { case NetConType: { NetCon* nc = (NetCon*) d; assert(nc >= nt.netcons && (nc < (nt.netcons + nt.n_netcon))); fh << (nc - nt.netcons) << "\n"; break; } case SelfEventType: { SelfEvent* se = (SelfEvent*) d; fh << int(se->target_->_type) << "\n"; fh << se->target_ - nt.pntprocs << "\n"; // index of nrnthread.pntprocs fh << se->target_->_i_instance << "\n"; // not needed except for assert check fh.write_array(&se->flag_, 1); fh << (se->movable_ - nt._vdata) << "\n"; // DANGEROUS? fh << se->weight_index_ << "\n"; // printf(" %d %ld %d %g %ld %d\n", se->target_->_type, se->target_ - nt.pntprocs, // se->target_->_i_instance, se->flag_, se->movable_ - nt._vdata, se->weight_index_); break; } case PreSynType: { PreSyn* ps = (PreSyn*) d; assert(ps >= nt.presyns && (ps < (nt.presyns + nt.n_presyn))); fh << (ps - nt.presyns) << "\n"; break; } case NetParEventType: { // nothing extra to write break; } case PlayRecordEventType: { PlayRecord* pr = ((PlayRecordEvent*) d)->plr_; fh << pr->type() << "\n"; if (pr->type() == VecPlayContinuousType) { VecPlayContinuous* vpc = (VecPlayContinuous*) pr; int ix = -1; for (int i = 0; i < nt.n_vecplay; ++i) { // if too many for fast search, put ix in the instance if (nt._vecplay[i] == (void*) vpc) { ix = i; break; } } assert(ix >= 0); fh << ix << "\n"; } else { assert(0); } break; } default: { // In particular, InputPreSyn does not appear in tqueue as it // immediately fans out to NetCon. assert(0); break; } } } void CheckPoints::restore_tqitem(int type, std::shared_ptr event, NrnThread& nt) { // printf("restore tqitem type=%d time=%.20g\n", type, time); switch (type) { case NetConType: { auto e = static_cast(event.get()); // printf(" NetCon %d\n", netcon_index); NetCon* nc = nt.netcons + e->netcon_index; nc->send(e->time, net_cvode_instance, &nt); break; } case SelfEventType: { auto e = static_cast(event.get()); if (e->target_type == patstimtype) { if (nt.id == 0) { patstim_te = e->time; } break; } Point_process* pnt = nt.pntprocs + e->point_proc_instance; // printf(" SelfEvent %d %d %d %g %d %d\n", target_type, point_proc_instance, // target_instance, flag, movable, weight_index); nrn_assert(e->target_instance == pnt->_i_instance); nrn_assert(e->target_type == pnt->_type); net_send(nt._vdata + e->movable, e->weight_index, pnt, e->time, e->flag); break; } case PreSynType: { auto e = static_cast(event.get()); // printf(" PreSyn %d\n", presyn_index); PreSyn* ps = nt.presyns + e->presyn_index; int gid = ps->output_index_; ps->output_index_ = -1; ps->send(e->time, net_cvode_instance, &nt); ps->output_index_ = gid; break; } case NetParEventType: { // nothing extra to read // printf(" NetParEvent\n"); break; } case PlayRecordEventType: { auto e = static_cast(event.get()); VecPlayContinuous* vpc = (VecPlayContinuous*) (nt._vecplay[e->vecplay_index]); vpc->e_->send(e->time, net_cvode_instance, &nt); break; } default: { assert(0); break; } } } void CheckPoints::write_tqueue(NrnThread& nt, FileHandler& fh) const { // VecPlayContinuous fh << nt.n_vecplay << " VecPlayContinuous state\n"; for (int i = 0; i < nt.n_vecplay; ++i) { VecPlayContinuous* vpc = (VecPlayContinuous*) nt._vecplay[i]; fh << vpc->last_index_ << "\n"; fh << vpc->discon_index_ << "\n"; fh << vpc->ubound_index_ << "\n"; } // PatternStim int patstim_index = -1; for (NrnThreadMembList* tml = nrn_threads[0].tml; tml; tml = tml->next) { if (tml->index == patstimtype) { Memb_list* ml = tml->ml; patstim_index = checkpoint_save_patternstim( /* below correct only for AoS */ 0, ml->nodecount, ml->data, ml->pdata, ml->_thread, nrn_threads, ml, 0.0); break; } } fh << patstim_index << " PatternStim\n"; // Avoid extra spikes due to some presyn voltages above threshold fh << -1 << " Presyn ConditionEvent flags\n"; for (int i = 0; i < nt.n_presyn; ++i) { // PreSyn.flag_ not used. HPC memory utilizes PreSynHelper.flag_ array fh << nt.presyns_helper[i].flag_ << "\n"; } NetCvodeThreadData& ntd = net_cvode_instance->p[nt.id]; // printf("write_tqueue %d %p\n", nt.id, ndt.tqe_); TQueue* tqe = ntd.tqe_; TQItem* q; fh << -1 << " TQItems from atomic_dq\n"; while ((q = tqe->atomic_dq(1e20)) != nullptr) { write_tqueue(q, nt, fh); } fh << 0 << "\n"; fh << -1 << " TQItemsfrom binq_\n"; for (q = tqe->binq_->first(); q; q = tqe->binq_->next(q)) { write_tqueue(q, nt, fh); } fh << 0 << "\n"; } // Read a tqueue/checkpoint // int :: should be equal to the previous n_vecplay // n_vecplay: // int: last_index // int: discon_index // int: ubound_index // int: patstim_index // int: should be -1 // n_presyn: // int: flags of presyn_helper // int: should be -1 // null terminated: // int: type // array of size 1: // double: time // ... depends of the type // int: should be -1 // null terminated: // int: TO BE DEFINED // ... depends of the type void CheckPoints::restore_tqueue(NrnThread& nt, const Phase2& p2) { restored = true; for (int i = 0; i < nt.n_vecplay; ++i) { VecPlayContinuous* vpc = (VecPlayContinuous*) nt._vecplay[i]; auto& vec = p2.vec_play_continuous[i]; vpc->last_index_ = vec.last_index; vpc->discon_index_ = vec.discon_index; vpc->ubound_index_ = vec.ubound_index; } // PatternStim patstim_index = p2.patstim_index; // PatternStim if (nt.id == 0) { patstim_te = -1.0; // changed if relevant SelfEvent; } for (int i = 0; i < nt.n_presyn; ++i) { nt.presyns_helper[i].flag_ = p2.preSynConditionEventFlags[i]; } for (const auto& event: p2.events) { restore_tqitem(event.first, event.second, nt); } } } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn_checkpoint.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/io/phase2.hpp" namespace coreneuron { struct NrnThread; class FileHandler; class CheckPoints { public: CheckPoints(const std::string& save, const std::string& restore); std::string get_save_path() const { return save_; } std::string get_restore_path() const { return restore_; } bool should_save() const { return !save_.empty(); } bool should_restore() const { return !restore_.empty(); } double restore_time() const; void write_checkpoint(NrnThread* nt, int nb_threads) const; /* return true if special checkpoint initialization carried out and one should not do finitialize */ bool initialize(); void restore_tqueue(NrnThread&, const Phase2& p2); private: const std::string save_; const std::string restore_; bool restored; int patstim_index; double patstim_te; void write_time() const; void write_phase2(NrnThread& nt) const; template void data_write(FileHandler& F, T* data, int cnt, int sz, int layout, int* permute) const; template T* soa2aos(T* data, int cnt, int sz, int layout, int* permute) const; void write_tqueue(TQItem* q, NrnThread& nt, FileHandler& fh) const; void write_tqueue(NrnThread& nt, FileHandler& fh) const; void restore_tqitem(int type, std::shared_ptr event, NrnThread& nt); }; int* inverse_permute(int* p, int n); void nrn_inverse_i_layout(int i, int& icnt, int cnt, int& isz, int sz, int layout); extern int patstimtype; #ifndef CHKPNTDEBUG #define CHKPNTDEBUG 0 #endif #if CHKPNTDEBUG // Factored out from checkpoint changes to nrnoc/multicore.h and nrnoc/nrnoc_ml.h // Put here to avoid potential issues with gpu transfer and to allow // debugging comparison with respect to checkpoint writing to verify that // data is same as on reading when inverse transforming SoA and permutations. // Following is a mixture of substantive information which is lost during // nrn_setup.cpp and debugging only information which is retrievable from // NrnThread and Memb_list. Ideally, this should all go away struct Memb_list_chkpnt { // debug only double* data_not_permuted; Datum* pdata_not_permuted; int* nodeindices_not_permuted; }; #endif // CHKPNTDEBUG but another section for it below struct NrnThreadChkpnt { int file_id; #if CHKPNTDEBUG int nmech; double* area; int* parent; Memb_list_chkpnt** mlmap; int n_outputgids; int* output_vindex; double* output_threshold; int* pnttype; int* pntindex; double* delay; // BBCOREPOINTER int nbcp; int* bcptype; int* bcpicnt; int* bcpdcnt; // VecPlay int* vtype; int* mtype; int* vecplay_ix; #endif // CHKPNTDEBUG }; extern NrnThreadChkpnt* nrnthread_chkpnt; } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn_filehandler.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/io/nrn_filehandler.hpp" #include "coreneuron/nrnconf.h" namespace coreneuron { FileHandler::FileHandler(const std::string& filename) : chkpnt(0) , stored_chkpnt(0) { this->open(filename); } bool FileHandler::file_exist(const std::string& filename) { struct stat buffer; return (stat(filename.c_str(), &buffer) == 0); } void FileHandler::open(const std::string& filename, std::ios::openmode mode) { nrn_assert((mode & (std::ios::in | std::ios::out))); close(); F.open(filename, mode | std::ios::binary); if (!F.is_open()) { std::cerr << "cannot open file '" << filename << "'" << std::endl; } nrn_assert(F.is_open()); current_mode = mode; char version[256]; if (current_mode & std::ios::in) { F.getline(version, sizeof(version)); nrn_assert(!F.fail()); check_bbcore_write_version(version); } if (current_mode & std::ios::out) { F << bbcore_write_version << "\n"; } } bool FileHandler::eof() { if (F.eof()) { return true; } int a = F.get(); if (F.eof()) { return true; } F.putback(a); return false; } int FileHandler::read_int() { char line_buf[max_line_length]; F.getline(line_buf, sizeof(line_buf)); nrn_assert(!F.fail()); int i; int n_scan = sscanf(line_buf, "%d", &i); nrn_assert(n_scan == 1); return i; } void FileHandler::read_mapping_count(int* gid, int* nsec, int* nseg, int* nseclist) { char line_buf[max_line_length]; F.getline(line_buf, sizeof(line_buf)); nrn_assert(!F.fail()); /** mapping file has extra strings, ignore those */ int n_scan = sscanf(line_buf, "%d %d %d %d", gid, nsec, nseg, nseclist); nrn_assert(n_scan == 4); } void FileHandler::read_mapping_cell_count(int* count) { *count = read_int(); } void FileHandler::read_checkpoint_assert() { char line_buf[max_line_length]; F.getline(line_buf, sizeof(line_buf)); nrn_assert(!F.fail()); int i; int n_scan = sscanf(line_buf, "chkpnt %d\n", &i); if (n_scan != 1) { fprintf(stderr, "no chkpnt line for %d\n", chkpnt); } nrn_assert(n_scan == 1); if (i != chkpnt) { fprintf(stderr, "file chkpnt %d != expected %d\n", i, chkpnt); } nrn_assert(i == chkpnt); ++chkpnt; } void FileHandler::close() { F.close(); } } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn_filehandler.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include #include #include "coreneuron/utils/nrn_assert.h" namespace coreneuron { /** Encapsulate low-level reading of coreneuron input data files. * * Error handling is simple: abort()! * * Reader will abort() if native integer size is not 4 bytes. * * All automatic allocations performed by read_int_array() * and read_dbl_array() methods use new []. */ // @todo: remove this static buffer const int max_line_length = 1024; class FileHandler { std::fstream F; //!< File stream associated with reader. std::ios_base::openmode current_mode; //!< File open mode (not stored in fstream) int chkpnt; //!< Current checkpoint number state. int stored_chkpnt; //!< last "remembered" checkpoint number state. /** Read a checkpoint line, bump our chkpnt counter, and assert equality. * * Checkpoint information is represented by a sequence "checkpt %d\n" * where %d is a scanf-compatible representation of the checkpoint * integer. */ void read_checkpoint_assert(); // FileHandler is not copyable. FileHandler(const FileHandler&) = delete; FileHandler& operator=(const FileHandler&) = delete; public: FileHandler() : chkpnt(0) , stored_chkpnt(0) {} explicit FileHandler(const std::string& filename); /** Preserving chkpnt state, move to a new file. */ void open(const std::string& filename, std::ios::openmode mode = std::ios::in); /** Is the file not open */ bool fail() const { return F.fail(); } static bool file_exist(const std::string& filename); /** nothing more to read */ bool eof(); /** Query chkpnt state. */ int checkpoint() const { return chkpnt; } /** Explicitly override chkpnt state. */ void checkpoint(int c) { chkpnt = c; } /** Record current chkpnt state. */ void record_checkpoint() { stored_chkpnt = chkpnt; } /** Restored last recorded chkpnt state. */ void restore_checkpoint() { chkpnt = stored_chkpnt; } /** Parse a single integer entry. * * Single integer entries are represented by their standard * (C locale) text representation, followed by a newline. * Extraneous characters following the integer but preceding * the newline are ignore. */ int read_int(); /** Parse a neuron mapping count entries * * Reads neuron mapping info which is represented by * gid, #sections, #segments, #section lists */ void read_mapping_count(int* gid, int* nsec, int* nseg, int* nseclist); /** Reads number of cells in parsing file */ void read_mapping_cell_count(int* count); /** Parse a neuron section segment mapping * * Read count no of mappings for section to segment */ template int read_mapping_info(T* mapinfo) { int nsec, nseg, n_scan; char line_buf[max_line_length], name[max_line_length]; F.getline(line_buf, sizeof(line_buf)); n_scan = sscanf(line_buf, "%s %d %d", name, &nsec, &nseg); nrn_assert(n_scan == 3); mapinfo->name = std::string(name); if (nseg) { std::vector sec, seg; sec.reserve(nseg); seg.reserve(nseg); read_array(&sec[0], nseg); read_array(&seg[0], nseg); for (int i = 0; i < nseg; i++) { mapinfo->add_segment(sec[i], seg[i]); } } return nseg; } /** Defined flag values for parse_array() */ enum parse_action { read, seek }; /** Generic parse function for an array of fixed length. * * \tparam T the array element type: may be \c int or \c double. * \param p pointer to the target in memory for reading into. * \param count number of items of type \a T to parse. * \param action whether to validate and skip (\c seek) or * copy array into memory (\c read). * \return the supplied pointer value. * * Error if \a count is non-zero, \a flag is \c read, and * the supplied pointer \p is null. * * Arrays are represented by a checkpoint line followed by * the array items in increasing index order, in the native binary * representation of the writing process. */ template inline T* parse_array(T* p, size_t count, parse_action flag) { if (count > 0 && flag != seek) nrn_assert(p != 0); read_checkpoint_assert(); switch (flag) { case seek: F.seekg(count * sizeof(T), std::ios_base::cur); break; case read: F.read((char*) p, count * sizeof(T)); break; } nrn_assert(!F.fail()); return p; } // convenience interfaces: /** Read an integer array of fixed length. */ template inline T* read_array(T* p, size_t count) { return parse_array(p, count, read); } /** Allocate and read an integer array of fixed length. */ template inline T* read_array(size_t count) { return parse_array(new T[count], count, read); } template inline std::vector read_vector(size_t count) { std::vector vec(count); parse_array(vec.data(), count, read); return vec; } /** Close currently open file. */ void close(); /** Write an 1D array **/ template void write_array(T* p, size_t nb_elements) { nrn_assert(F.is_open()); nrn_assert(current_mode & std::ios::out); write_checkpoint(); F.write((const char*) p, nb_elements * (sizeof(T))); nrn_assert(!F.fail()); } /** Write a padded array. nb_elements is number of elements to write per line, * line_width is full size of a line in nb elements**/ template void write_array(T* p, size_t nb_elements, size_t line_width, size_t nb_lines, bool to_transpose = false) { nrn_assert(F.is_open()); nrn_assert(current_mode & std::ios::out); write_checkpoint(); T* temp_cpy = new T[nb_elements * nb_lines]; if (to_transpose) { for (size_t i = 0; i < nb_lines; i++) { for (size_t j = 0; j < nb_elements; j++) { temp_cpy[i + j * nb_lines] = p[i * line_width + j]; } } } else { memcpy(temp_cpy, p, nb_elements * sizeof(T) * nb_lines); } // AoS never use padding, SoA is translated above, so one write // operation is enought in both cases F.write((const char*) temp_cpy, nb_elements * sizeof(T) * nb_lines); nrn_assert(!F.fail()); delete[] temp_cpy; } template FileHandler& operator<<(const T& scalar) { nrn_assert(F.is_open()); nrn_assert(current_mode & std::ios::out); F << scalar; nrn_assert(!F.fail()); return *this; } private: /* write_checkpoint is callable only for our internal uses, making it accesible to user, makes * file format unpredictable */ void write_checkpoint() { F << "chkpnt " << chkpnt++ << "\n"; } }; } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn_setup.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include #include #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/nrnconf.h" #include "coreneuron/utils/randoms/nrnran123.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/network/multisend.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/utils/nrnmutdec.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/utils/utils.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/io/nrn_setup.hpp" #include "coreneuron/network/partrans.hpp" #include "coreneuron/io/nrn_checkpoint.hpp" #include "coreneuron/permute/node_permute.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/io/nrnsection_mapping.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/io/phase1.hpp" #include "coreneuron/io/phase2.hpp" #include "coreneuron/io/mech_report.h" #include "coreneuron/io/reports/nrnreport.hpp" // callbacks into nrn/src/nrniv/nrnbbcore_write.cpp #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/coreneuron.hpp" /// --> Coreneuron bool corenrn_embedded; int corenrn_embedded_nthread; void (*nrn2core_group_ids_)(int*); extern "C" { SetupTransferInfo* (*nrn2core_get_partrans_setup_info_)(int ngroup, int cn_nthread, size_t cn_sidt_size); } void (*nrn2core_get_trajectory_requests_)(int tid, int& bsize, int& n_pr, void**& vpr, int& n_trajec, int*& types, int*& indices, double**& pvars, double**& varrays); void (*nrn2core_trajectory_values_)(int tid, int n_pr, void** vpr, double t); void (*nrn2core_trajectory_return_)(int tid, int n_pr, int bsize, int vecsz, void** vpr, double t); int (*nrn2core_all_spike_vectors_return_)(std::vector& spikevec, std::vector& gidvec); void (*nrn2core_all_weights_return_)(std::vector& weights); // file format defined in cooperation with nrncore/src/nrniv/nrnbbcore_write.cpp // single integers are ascii one per line. arrays are binary int or double // Note that regardless of the gid contents of a group, since all gids are // globally unique, a filename convention which involves the first gid // from the group is adequate. Also note that balance is carried out from a // per group perspective and launching a process consists of specifying // a list of group ids (first gid of the group) for each process. // // _1.dat // n_presyn, n_netcon // output_gids (npresyn) with -(type+1000*index) for those acell with no gid // netcon_srcgid (nnetcon) -(type+1000*index) refers to acell with no gid // -1 means the netcon has no source (not implemented) // Note that the negative gids are only thread unique and not process unique. // We create a thread specific hash table for the negative gids for each thread // when _1.dat is read and then destroy it after _2.dat // is finished using it. An earlier implementation which attempted to // encode the thread number into the negative gid // (i.e -ith - nth*(type +1000*index)) failed due to not large enough // integer domain size. // Note that for file transfer it is an error if a negative srcgid is // not in the same thread as the target. This is because there it may // not be the case that threads in a NEURON process end up on same process // in CoreNEURON. NEURON will raise an error if this // is the case. However, for direct memory transfer, it is allowed that // a negative srcgid may be in a different thread than the target. So // nrn2core_get_dat1 has a last arg netcon_negsrcgid_tid that specifies // for the negative gids in netcon_srcgid (in that order) the source thread. // // _2.dat // n_real_cell, n_output, n_real_output, nnode // ndiam - 0 if no mechanism has dparam with diam semantics, or nnode // nmech - includes artcell mechanisms // for the nmech tml mechanisms // type, nodecount // nidata, nvdata, nweight // v_parent_index (nnode) // actual_a, b, area, v (nnode) // diam - if ndiam > 0. Note that only valid diam is for those nodes with diam semantics mechanisms // for the nmech tml mechanisms // nodeindices (nodecount) but only if not an artificial cell // data (nodecount*param_size) // pdata (nodecount*dparam_size) but only if dparam_size > 0 on this side. // output_vindex (n_presyn) >= 0 associated with voltages -(type+1000*index) for acell // output_threshold (n_real_output) // netcon_pnttype (nnetcon) <=0 if a NetCon does not have a target. // netcon_pntindex (nnetcon) // weights (nweight) // delays (nnetcon) // for the nmech tml mechanisms that have a nrn_bbcore_write method // type // icnt // dcnt // int array (number specified by the nodecount nrn_bbcore_write // to be intepreted by this side's nrn_bbcore_read method) // double array // #VectorPlay_instances, for each of these instances // 4 (VecPlayContinuousType) // mtype // index (from Memb_list.data) // vecsize // yvec // tvec // // The critical issue requiring careful attention is that a coreneuron // process reads many coreneuron thread files with a result that, although // the conceptual // total n_pre is the sum of all the n_presyn from each thread as is the // total number of output_gid, the number of InputPreSyn instances must // be computed here from a knowledge of all thread's netcon_srcgid after // all thread's output_gids have been registered. We want to save the // "individual allocation of many small objects" memory overhead by // allocating a single InputPreSyn array for the entire process. // For this reason cellgroup data are divided into two separate // files with the first containing output_gids and netcon_srcgid which are // stored in the nt.presyns array and nt.netcons array respectively namespace coreneuron { static OMP_Mutex mut; /// Vector of maps for negative presyns std::vector> neg_gid2out; /// Maps for ouput and input presyns std::map gid2out; std::map gid2in; /// InputPreSyn.nc_index_ to + InputPreSyn.nc_cnt_ give the NetCon* std::vector netcon_in_presyn_order_; /// Only for setup vector of netcon source gids std::vector nrnthreads_netcon_srcgid; /// If a nrnthreads_netcon_srcgid is negative, need to determine the thread when /// in order to use the correct neg_gid2out[tid] map std::vector> nrnthreads_netcon_negsrcgid_tid; /* read files.dat file and distribute cellgroups to all mpi ranks */ void nrn_read_filesdat(int& ngrp, int*& grp, const char* filesdat) { patstimtype = nrn_get_mechtype("PatternStim"); if (corenrn_embedded) { ngrp = corenrn_embedded_nthread; grp = new int[ngrp + 1]; (*nrn2core_group_ids_)(grp); return; } FILE* fp = fopen(filesdat, "r"); if (!fp) { nrn_fatal_error("No input file ( %s ) with nrnthreads, exiting...", filesdat); } char version[256]; nrn_assert(fscanf(fp, "%s\n", version) == 1); check_bbcore_write_version(version); int iNumFiles = 0; nrn_assert(fscanf(fp, "%d\n", &iNumFiles) == 1); // temporary strategem to figure out if model uses gap junctions while // being backward compatible if (iNumFiles == -1) { nrn_assert(fscanf(fp, "%d\n", &iNumFiles) == 1); nrn_have_gaps = true; if (nrnmpi_myid == 0) { printf("Model uses gap junctions\n"); } } if (nrnmpi_numprocs > iNumFiles && nrnmpi_myid == 0) { printf( "Info : The number of input datasets are less than ranks, some ranks will be idle!\n"); } ngrp = 0; grp = new int[iNumFiles / nrnmpi_numprocs + 1]; // irerate over gids in files.dat for (int iNum = 0; iNum < iNumFiles; ++iNum) { int iFile; nrn_assert(fscanf(fp, "%d\n", &iFile) == 1); if ((iNum % nrnmpi_numprocs) == nrnmpi_myid) { grp[ngrp] = iFile; ngrp++; } } fclose(fp); } void netpar_tid_gid2ps(int tid, int gid, PreSyn** ps, InputPreSyn** psi) { /// for gid < 0 returns the PreSyn* in the thread (tid) specific map. *ps = nullptr; *psi = nullptr; if (gid >= 0) { auto gid2out_it = gid2out.find(gid); if (gid2out_it != gid2out.end()) { *ps = gid2out_it->second; } else { auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { *psi = gid2in_it->second; } } } else { auto gid2out_it = neg_gid2out[tid].find(gid); if (gid2out_it != neg_gid2out[tid].end()) { *ps = gid2out_it->second; } } } void determine_inputpresyn() { // allocate the process wide InputPreSyn array // all the output_gid have been registered and associated with PreSyn. // now count the needed InputPreSyn by filling the netpar::gid2in map gid2in.clear(); // now have to fill the new table // do not need to worry about negative gid overlap since only use // it to search for PreSyn in this thread. std::vector inputpresyn_; for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; // associate gid with InputPreSyn and increase PreSyn and InputPreSyn count nt.n_input_presyn = 0; // if single thread or file transfer then definitely empty. std::vector& negsrcgid_tid = nrnthreads_netcon_negsrcgid_tid[ith]; size_t i_tid = 0; for (int i = 0; i < nt.n_netcon; ++i) { int gid = nrnthreads_netcon_srcgid[ith][i]; if (gid >= 0) { /// If PreSyn or InputPreSyn is already in the map auto gid2out_it = gid2out.find(gid); if (gid2out_it != gid2out.end()) { /// Increase PreSyn count ++gid2out_it->second->nc_cnt_; continue; } auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { /// Increase InputPreSyn count ++gid2in_it->second->nc_cnt_; continue; } /// Create InputPreSyn and increase its count InputPreSyn* psi = new InputPreSyn; ++psi->nc_cnt_; gid2in[gid] = psi; inputpresyn_.push_back(psi); ++nt.n_input_presyn; } else { int tid = nt.id; if (!negsrcgid_tid.empty()) { tid = negsrcgid_tid[i_tid++]; } auto gid2out_it = neg_gid2out[tid].find(gid); if (gid2out_it != neg_gid2out[tid].end()) { /// Increase negative PreSyn count ++gid2out_it->second->nc_cnt_; } } } } // now, we can opportunistically create the NetCon* pointer array // to save some memory overhead for // "large number of small array allocation" by // counting the number of NetCons each PreSyn and InputPreSyn point to. // Conceivably the nt.netcons could become a process global array // in which case the NetCon* pointer array could become an integer index // array. More speculatively, the index array could be eliminated itself // if the process global NetCon array were ordered properly but that // would interleave NetCon from different threads. Not a problem for // serial threads but the reordering would propagate to nt.pntprocs // if the NetCon data pointers are also replaced by integer indices. // First, allocate the pointer array. int n_nc = 0; for (int ith = 0; ith < nrn_nthread; ++ith) { n_nc += nrn_threads[ith].n_netcon; } netcon_in_presyn_order_.resize(n_nc); n_nc = 0; // fill the indices with the offset values and reset the nc_cnt_ // such that we use the nc_cnt_ in the following loop to assign the NetCon // to the right place // for PreSyn int offset = 0; for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; for (int i = 0; i < nt.n_presyn; ++i) { PreSyn& ps = nt.presyns[i]; ps.nc_index_ = offset; offset += ps.nc_cnt_; ps.nc_cnt_ = 0; } } // for InputPreSyn for (auto psi: inputpresyn_) { psi->nc_index_ = offset; offset += psi->nc_cnt_; psi->nc_cnt_ = 0; } inputpresyn_.clear(); // with gid to InputPreSyn and PreSyn maps we can setup the multisend // target lists. if (use_multisend_) { #if NRN_MULTISEND nrn_multisend_setup(); #endif } // fill the netcon_in_presyn_order and recompute nc_cnt_ // note that not all netcon_in_presyn will be filled if there are netcon // with no presyn (ie. nrnthreads_netcon_srcgid[nt.id][i] = -1) but that is ok since they are // only used via ps.nc_index_ and ps.nc_cnt_; for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; // if single thread or file transfer then definitely empty. std::vector& negsrcgid_tid = nrnthreads_netcon_negsrcgid_tid[ith]; size_t i_tid = 0; for (int i = 0; i < nt.n_netcon; ++i) { NetCon* nc = nt.netcons + i; int gid = nrnthreads_netcon_srcgid[ith][i]; int tid = ith; if (!negsrcgid_tid.empty() && gid < -1) { tid = negsrcgid_tid[i_tid++]; } PreSyn* ps; InputPreSyn* psi; netpar_tid_gid2ps(tid, gid, &ps, &psi); if (ps) { netcon_in_presyn_order_[ps->nc_index_ + ps->nc_cnt_] = nc; ++ps->nc_cnt_; ++n_nc; } else if (psi) { netcon_in_presyn_order_[psi->nc_index_ + psi->nc_cnt_] = nc; ++psi->nc_cnt_; ++n_nc; } } } /// Resize the vector to its actual size of the netcons put in it netcon_in_presyn_order_.resize(n_nc); } /// Clean up void nrn_setup_cleanup() { for (int ith = 0; ith < nrn_nthread; ++ith) { if (nrnthreads_netcon_srcgid[ith]) delete[] nrnthreads_netcon_srcgid[ith]; } nrnthreads_netcon_srcgid.clear(); nrnthreads_netcon_negsrcgid_tid.clear(); neg_gid2out.clear(); } void nrn_setup(const char* filesdat, bool is_mapping_needed, CheckPoints& checkPoints, bool run_setup_cleanup, const char* datpath, const char* restore_path, double* mindelay) { double time = nrn_wtime(); int ngroup; int* gidgroups; nrn_read_filesdat(ngroup, gidgroups, filesdat); UserParams userParams(ngroup, gidgroups, datpath, strlen(restore_path) == 0 ? datpath : restore_path, checkPoints); // temporary bug work around. If any process has multiple threads, no // process can have a single thread. So, for now, if one thread, make two. // Fortunately, empty threads work fine. // Allocate NrnThread* nrn_threads of size ngroup (minimum 2) // Note that rank with 0 dataset/cellgroup works fine nrn_threads_create(userParams.ngroup <= 1 ? 2 : userParams.ngroup); // from nrn_has_net_event create pnttype2presyn for use in phase2. auto& memb_func = corenrn.get_memb_funcs(); auto& pnttype2presyn = corenrn.get_pnttype2presyn(); auto& nrn_has_net_event_ = corenrn.get_has_net_event(); pnttype2presyn.clear(); pnttype2presyn.resize(memb_func.size(), -1); for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) { pnttype2presyn[nrn_has_net_event_[i]] = i; } nrnthread_chkpnt = new NrnThreadChkpnt[nrn_nthread]; if (nrn_nthread > 1) { // NetCvode construction assumed one thread. Need nrn_nthread instances // of NetCvodeThreadData. Here since possible checkpoint restore of // tqueue at end of phase2. nrn_p_construct(); } if (use_solve_interleave) { create_interleave_info(); } /// Reserve vector of maps of size ngroup for negative gid-s /// std::vector< std::map > neg_gid2out; neg_gid2out.resize(userParams.ngroup); // bug fix. gid2out is cumulative over all threads and so do not // know how many there are til after phase1 // A process's complete set of output gids and allocation of each thread's // nt.presyns and nt.netcons arrays. // Generates the gid2out map which is needed // to later count the required number of InputPreSyn /// gid2out - map of output presyn-s /// std::map gid2out; gid2out.clear(); nrnthreads_netcon_srcgid.resize(nrn_nthread); for (int i = 0; i < nrn_nthread; ++i) nrnthreads_netcon_srcgid[i] = nullptr; // Gap junctions used to be done first in the sense of reading files // and calling gap_mpi_setup. But during phase2, gap_thread_setup and // gap_indices_permute were called after NrnThread.data was in its final // layout and mechanism permutation was determined. This is no longer // ideal as it necessitates keeping setup_info_ in existence to the end // of phase2. So gap junction setup is deferred to after phase2. nrnthreads_netcon_negsrcgid_tid.resize(nrn_nthread); if (!corenrn_embedded) { coreneuron::phase_wrapper(userParams); } else { nrn_multithread_job([](NrnThread* n) { Phase1 p1{n->id}; NrnThread& nt = *n; p1.populate(nt, mut); }); } // from the gid2out map and the nrnthreads_netcon_srcgid array, // fill the gid2in, and from the number of entries, // allocate the process wide InputPreSyn array determine_inputpresyn(); // read the rest of the gidgroup's data and complete the setup for each // thread. /* nrn_multithread_job supports serial, pthread, and openmp. */ coreneuron::phase_wrapper(userParams, corenrn_embedded); // gap junctions // Gaps are done after phase2, in order to use layout and permutation // information via calls to stdindex2ptr. if (nrn_have_gaps) { nrn_partrans::transfer_thread_data_ = new nrn_partrans::TransferThreadData[nrn_nthread]; if (!corenrn_embedded) { nrn_partrans::setup_info_ = new SetupTransferInfo[nrn_nthread]; coreneuron::phase_wrapper(userParams); } else { nrn_partrans::setup_info_ = (*nrn2core_get_partrans_setup_info_)(userParams.ngroup, nrn_nthread, sizeof(sgid_t)); } nrn_multithread_job(nrn_partrans::gap_data_indices_setup); nrn_partrans::gap_mpi_setup(userParams.ngroup); // Whether allocated in NEURON or here, delete here. delete[] nrn_partrans::setup_info_; nrn_partrans::setup_info_ = nullptr; } if (is_mapping_needed) coreneuron::phase_wrapper(userParams); *mindelay = set_mindelay(*mindelay); if (run_setup_cleanup) // if run_setup_cleanup==false, user must call nrn_setup_cleanup() later nrn_setup_cleanup(); #if INTERLEAVE_DEBUG // mk_cell_indices debug code is supposed to be used with cell-per-core permutations if (corenrn_param.cell_interleave_permute == 1) { mk_cell_indices(); } #endif /// Allocate memory for fast_imem calculation nrn_fast_imem_alloc(); /// Generally, tables depend on a few parameters. And if those parameters change, /// then the table needs to be recomputed. This is obviously important in NEURON /// since the user can change those parameters at any time. However, there is no /// c example for CoreNEURON so can't see what it looks like in that context. /// Boils down to setting up a function pointer of the function _check_table_thread(), /// which is only executed by StochKV.c. nrn_mk_table_check(); // was done in nrn_thread_memblist_setup in multicore.c size_t model_size_bytes; if (corenrn_param.model_stats) { write_mech_report(); model_size_bytes = model_size(true); } else { model_size_bytes = model_size(false); } if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) { printf(" Setup Done : %.2lf seconds \n", nrn_wtime() - time); if (model_size_bytes < 1024) { printf(" Model size : %ld bytes\n", model_size_bytes); } else if (model_size_bytes < 1024 * 1024) { printf(" Model size : %.2lf kB\n", model_size_bytes / 1024.); } else if (model_size_bytes < 1024 * 1024 * 1024) { printf(" Model size : %.2lf MB\n", model_size_bytes / (1024. * 1024.)); } else { printf(" Model size : %.2lf GB\n", model_size_bytes / (1024. * 1024. * 1024.)); } } delete[] userParams.gidgroups; } void setup_ThreadData(NrnThread& nt) { for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { Memb_func& mf = corenrn.get_memb_func(tml->index); Memb_list* ml = tml->ml; if (mf.thread_size_) { ml->_thread = (ThreadDatum*) ecalloc_align(mf.thread_size_, sizeof(ThreadDatum)); if (mf.thread_mem_init_) { { const std::lock_guard lock(mut); (*mf.thread_mem_init_)(ml->_thread); } } } else { ml->_thread = nullptr; } } } void read_phasegap(NrnThread& nt, UserParams& userParams) { auto& F = userParams.file_reader[nt.id]; if (F.fail()) { return; } F.checkpoint(0); int sidt_size = F.read_int(); assert(sidt_size == int(sizeof(sgid_t))); std::size_t ntar = F.read_int(); std::size_t nsrc = F.read_int(); auto& si = nrn_partrans::setup_info_[nt.id]; si.src_sid.resize(nsrc); si.src_type.resize(nsrc); si.src_index.resize(nsrc); if (nsrc) { F.read_array(si.src_sid.data(), nsrc); F.read_array(si.src_type.data(), nsrc); F.read_array(si.src_index.data(), nsrc); } si.tar_sid.resize(ntar); si.tar_type.resize(ntar); si.tar_index.resize(ntar); if (ntar) { F.read_array(si.tar_sid.data(), ntar); F.read_array(si.tar_type.data(), ntar); F.read_array(si.tar_index.data(), ntar); } #if CORENRN_DEBUG printf("%d read_phasegap tid=%d nsrc=%d ntar=%d\n", nrnmpi_myid, nt.id, nsrc, ntar); for (int i = 0; i < nsrc; ++i) { printf("src %z %d %d\n", size_t(si.src_sid[i]), si.src_type[i], si.src_index[i]); } for (int i = 0; i < ntar; ++i) { printf("tar %z %d %d\n", size_t(si.src_sid[i]), si.src_type[i], si.src_index[i]); } #endif } // This function is related to nrn_dblpntr2nrncore in Neuron to determine which values should // be transferred from CoreNeuron. Types correspond to the value to be transferred based on // mech_type enum or non-artificial cell mechanisms. // take into account alignment, layout, permutation // only voltage, i_membrane_ or mechanism data index allowed. (mtype 0 means time) double* stdindex2ptr(int mtype, int index, NrnThread& nt) { if (mtype == voltage) { // voltage int ix{index}; // relative to _actual_v nrn_assert((ix >= 0) && (ix < nt.end)); if (nt._permute) { node_permute(&ix, 1, nt._permute); } return nt._actual_v + ix; } else if (mtype == i_membrane_) { // membrane current from fast_imem calculation int ix{index}; // relative to nrn_fast_imem->nrn_sav_rhs nrn_assert((ix >= 0) && (ix < nt.end)); if (nt._permute) { node_permute(&ix, 1, nt._permute); } return nt.nrn_fast_imem->nrn_sav_rhs + ix; } else if (mtype > 0 && mtype < static_cast(corenrn.get_memb_funcs().size())) { // Memb_list* ml = nt._ml_list[mtype]; nrn_assert(ml); int ix = nrn_param_layout(index, mtype, ml); if (ml->_permute) { ix = nrn_index_permute(ix, mtype, ml); } return ml->data + ix; } else if (mtype == 0) { // time return &nt._t; } else { printf("stdindex2ptr does not handle mtype=%d\n", mtype); nrn_assert(0); } return nullptr; } // from i to (icnt, isz) void nrn_inverse_i_layout(int i, int& icnt, int cnt, int& isz, int sz, int layout) { if (layout == Layout::AoS) { icnt = i / sz; isz = i % sz; } else if (layout == Layout::SoA) { int padded_cnt = nrn_soa_padded_size(cnt, layout); icnt = i % padded_cnt; isz = i / padded_cnt; } else { assert(0); } } /** * Cleanup global ion map created during mechanism registration * * In case of coreneuron standalone execution nrn_ion_global_map * can be deleted at the end of execution. But in case embedded * run via neuron, mechanisms are registered only once i.e. during * first call to coreneuron. This is why we call cleanup only in * case of standalone coreneuron execution via nrniv-core or * special-core. * * @todo coreneuron should have finalise callback which can be * called from NEURON for final memory cleanup including global * state like registered mechanisms and ions map. */ void nrn_cleanup_ion_map() { for (int i = 0; i < nrn_ion_global_map_size; i++) { free_memory(nrn_ion_global_map[i]); } free_memory(nrn_ion_global_map); nrn_ion_global_map = nullptr; nrn_ion_global_map_size = 0; } void delete_fornetcon_info(NrnThread& nt) { delete[] std::exchange(nt._fornetcon_perm_indices, nullptr); delete[] std::exchange(nt._fornetcon_weight_perm, nullptr); } /* nrn_threads_free() presumes all NrnThread and NrnThreadMembList data is * allocated with malloc(). This is not the case here, so let's try and fix * things up first. */ void nrn_cleanup() { clear_event_queue(); // delete left-over TQItem for (auto psi: gid2in) { delete psi.second; } gid2in.clear(); gid2out.clear(); // clean nrnthread_chkpnt if (nrnthread_chkpnt) { delete[] nrnthread_chkpnt; nrnthread_chkpnt = nullptr; } // clean NrnThreads for (int it = 0; it < nrn_nthread; ++it) { NrnThread* nt = nrn_threads + it; NrnThreadMembList* next_tml = nullptr; delete_fornetcon_info(*nt); delete_trajectory_requests(*nt); for (NrnThreadMembList* tml = nt->tml; tml; tml = next_tml) { Memb_list* ml = tml->ml; mod_f_t s = corenrn.get_memb_func(tml->index).destructor; if (s) { (*s)(nt, ml, tml->index); } ml->data = nullptr; // this was pointing into memory owned by nt free_memory(ml->pdata); ml->pdata = nullptr; free_memory(ml->nodeindices); ml->nodeindices = nullptr; if (ml->_permute) { delete[] ml->_permute; ml->_permute = nullptr; } if (ml->_thread) { free_memory(ml->_thread); ml->_thread = nullptr; } // Destroy the global variables struct allocated in nrn_init if (auto* const priv_dtor = corenrn.get_memb_func(tml->index).private_destructor) { (*priv_dtor)(nt, ml, tml->index); assert(!ml->instance); assert(!ml->global_variables); assert(ml->global_variables_size == 0); } NetReceiveBuffer_t* nrb = ml->_net_receive_buffer; if (nrb) { if (nrb->_size) { free_memory(nrb->_pnt_index); free_memory(nrb->_weight_index); free_memory(nrb->_nrb_t); free_memory(nrb->_nrb_flag); free_memory(nrb->_displ); free_memory(nrb->_nrb_index); } free_memory(nrb); ml->_net_receive_buffer = nullptr; } NetSendBuffer_t* nsb = ml->_net_send_buffer; if (nsb) { delete nsb; ml->_net_send_buffer = nullptr; } if (tml->dependencies) free(tml->dependencies); next_tml = tml->next; free_memory(tml->ml); free_memory(tml); } nt->_actual_rhs = nullptr; nt->_actual_d = nullptr; nt->_actual_a = nullptr; nt->_actual_b = nullptr; free_memory(nt->_v_parent_index); nt->_v_parent_index = nullptr; free_memory(nt->_data); nt->_data = nullptr; free(nt->_idata); nt->_idata = nullptr; free_memory(nt->_vdata); nt->_vdata = nullptr; if (nt->_permute) { delete[] nt->_permute; nt->_permute = nullptr; } if (nt->presyns_helper) { free_memory(nt->presyns_helper); nt->presyns_helper = nullptr; } if (nt->pntprocs) { free_memory(nt->pntprocs); nt->pntprocs = nullptr; } if (nt->presyns) { delete[] nt->presyns; nt->presyns = nullptr; } if (nt->pnt2presyn_ix) { for (size_t i = 0; i < corenrn.get_has_net_event().size(); ++i) { if (nt->pnt2presyn_ix[i]) { free(nt->pnt2presyn_ix[i]); } } free_memory(nt->pnt2presyn_ix); } if (nt->netcons) { delete[] nt->netcons; nt->netcons = nullptr; } if (nt->weights) { free_memory(nt->weights); nt->weights = nullptr; } if (nt->_shadow_rhs) { free_memory(nt->_shadow_rhs); nt->_shadow_rhs = nullptr; } if (nt->_shadow_d) { free_memory(nt->_shadow_d); nt->_shadow_d = nullptr; } if (nt->_net_send_buffer_size) { free_memory(nt->_net_send_buffer); nt->_net_send_buffer = nullptr; nt->_net_send_buffer_size = 0; } if (nt->_watch_types) { free(nt->_watch_types); nt->_watch_types = nullptr; } // mapping information is available only for non-empty NrnThread if (nt->mapping && nt->ncell) { delete ((NrnThreadMappingInfo*) nt->mapping); } free_memory(nt->_ml_list); if (nt->nrn_fast_imem) { fast_imem_free(); } } #if NRN_MULTISEND nrn_multisend_cleanup(); #endif netcon_in_presyn_order_.clear(); nrn_threads_free(); if (!corenrn.get_pnttype2presyn().empty()) { corenrn.get_pnttype2presyn().clear(); } destroy_interleave_info(); nrn_partrans::gap_cleanup(); } void delete_trajectory_requests(NrnThread& nt) { if (nt.trajec_requests) { TrajectoryRequests* tr = nt.trajec_requests; if (tr->n_trajec) { delete[] tr->vpr; if (tr->scatter) { delete[] tr->scatter; } if (tr->varrays) { delete[] tr->varrays; } delete[] tr->gather; } delete nt.trajec_requests; nt.trajec_requests = nullptr; } } void read_phase1(NrnThread& nt, UserParams& userParams) { Phase1 p1{userParams.file_reader[nt.id]}; // Protect gid2in, gid2out and neg_gid2out p1.populate(nt, mut); } void read_phase2(NrnThread& nt, UserParams& userParams) { Phase2 p2; if (corenrn_embedded) { p2.read_direct(nt.id, nt); } else { p2.read_file(userParams.file_reader[nt.id], nt); } p2.populate(nt, userParams); } /** read mapping information for neurons */ void read_phase3(NrnThread& nt, UserParams& userParams) { /** restore checkpoint state (before restoring queue items */ auto& F = userParams.file_reader[nt.id]; F.restore_checkpoint(); /** mapping information for all neurons in single NrnThread */ NrnThreadMappingInfo* ntmapping = new NrnThreadMappingInfo(); int count = 0; F.read_mapping_cell_count(&count); /** number of cells in mapping file should equal to cells in NrnThread */ nrn_assert(count == nt.ncell); /** for every neuron */ for (int i = 0; i < nt.ncell; i++) { int gid, nsec, nseg, nseclist; // read counts F.read_mapping_count(&gid, &nsec, &nseg, &nseclist); CellMapping* cmap = new CellMapping(gid); // read section-segment mapping for every section list for (int j = 0; j < nseclist; j++) { SecMapping* smap = new SecMapping(); F.read_mapping_info(smap); cmap->add_sec_map(smap); } ntmapping->add_cell_mapping(cmap); } // make number #cells match with mapping size nrn_assert((int) ntmapping->size() == nt.ncell); // set pointer in NrnThread nt.mapping = (void*) ntmapping; nt.summation_report_handler_ = std::make_unique(); } /* Returns the size of the dynamically allocated memory for NrnThreadMembList * Includes: * - Size of NrnThreadMembList * - Size of Memb_list * - Size of nodeindices * - Size of _permute * - Size of _thread * - Size of NetReceive and NetSend Buffers * - Size of int variables * - Size of double variables (If include_data is enabled. Those variables are already counted * since they point to nt->_data.) */ size_t memb_list_size(NrnThreadMembList* tml, bool include_data) { size_t nbyte = sizeof(NrnThreadMembList) + sizeof(Memb_list); nbyte += tml->ml->nodecount * sizeof(int); if (tml->ml->_permute) { nbyte += tml->ml->nodecount * sizeof(int); } if (tml->ml->_thread) { Memb_func& mf = corenrn.get_memb_func(tml->index); nbyte += mf.thread_size_ * sizeof(ThreadDatum); } if (tml->ml->_net_receive_buffer) { nbyte += sizeof(NetReceiveBuffer_t) + tml->ml->_net_receive_buffer->size_of_object(); } if (tml->ml->_net_send_buffer) { nbyte += sizeof(NetSendBuffer_t) + tml->ml->_net_send_buffer->size_of_object(); } if (include_data) { nbyte += corenrn.get_prop_param_size()[tml->index] * tml->ml->nodecount * sizeof(double); } nbyte += corenrn.get_prop_dparam_size()[tml->index] * tml->ml->nodecount * sizeof(Datum); #ifdef DEBUG int i = tml->index; printf("%s %d psize=%d ppsize=%d cnt=%d nbyte=%ld\n", corenrn.get_memb_func(i).sym, i, corenrn.get_prop_param_size()[i], corenrn.get_prop_dparam_size()[i], tml->ml->nodecount, nbyte); #endif return nbyte; } /// Approximate count of number of bytes for the gid2out map size_t output_presyn_size(void) { if (gid2out.empty()) { return 0; } size_t nbyte = sizeof(gid2out) + sizeof(int) * gid2out.size() + sizeof(PreSyn*) * gid2out.size(); #ifdef DEBUG printf(" gid2out table bytes=~%ld size=%ld\n", nbyte, gid2out.size()); #endif return nbyte; } size_t input_presyn_size(void) { if (gid2in.empty()) { return 0; } size_t nbyte = sizeof(gid2in) + sizeof(int) * gid2in.size() + sizeof(InputPreSyn*) * gid2in.size(); #ifdef DEBUG printf(" gid2in table bytes=~%ld size=%ld\n", nbyte, gid2in.size()); #endif return nbyte; } size_t model_size(bool detailed_report) { long nbyte = 0; size_t sz_nrnThread = sizeof(NrnThread); size_t sz_presyn = sizeof(PreSyn); size_t sz_input_presyn = sizeof(InputPreSyn); size_t sz_netcon = sizeof(NetCon); size_t sz_pntproc = sizeof(Point_process); size_t nccnt = 0; std::vector size_data(13, 0); std::vector global_size_data_min(13, 0); std::vector global_size_data_max(13, 0); std::vector global_size_data_sum(13, 0); std::vector global_size_data_avg(13, 0.0); for (int i = 0; i < nrn_nthread; ++i) { NrnThread& nt = nrn_threads[i]; size_t nb_nt = 0; // per thread nccnt += nt.n_netcon; // Memb_list size int nmech = 0; for (auto tml = nt.tml; tml; tml = tml->next) { nb_nt += memb_list_size(tml, false); ++nmech; } // basic thread size includes mechanism data and G*V=I matrix nb_nt += sz_nrnThread; nb_nt += nt._ndata * sizeof(double) + nt._nidata * sizeof(int) + nt._nvdata * sizeof(void*); nb_nt += nt.end * sizeof(int); // _v_parent_index // network connectivity nb_nt += nt.n_pntproc * sz_pntproc + nt.n_netcon * sz_netcon + nt.n_presyn * sz_presyn + nt.n_input_presyn * sz_input_presyn + nt.n_weight * sizeof(double); nbyte += nb_nt; #ifdef DEBUG printf("ncell=%d end=%d nmech=%d\n", nt.ncell, nt.end, nmech); printf("ndata=%ld nidata=%ld nvdata=%ld\n", nt._ndata, nt._nidata, nt._nvdata); printf("nbyte so far %ld\n", nb_nt); printf("n_presyn = %d sz=%ld nbyte=%ld\n", nt.n_presyn, sz_presyn, nt.n_presyn * sz_presyn); printf("n_input_presyn = %d sz=%ld nbyte=%ld\n", nt.n_input_presyn, sz_input_presyn, nt.n_input_presyn * sz_input_presyn); printf("n_pntproc=%d sz=%ld nbyte=%ld\n", nt.n_pntproc, sz_pntproc, nt.n_pntproc * sz_pntproc); printf("n_netcon=%d sz=%ld nbyte=%ld\n", nt.n_netcon, sz_netcon, nt.n_netcon * sz_netcon); printf("n_weight = %d\n", nt.n_weight); printf("%d thread %d total bytes %ld\n", nrnmpi_myid, i, nb_nt); #endif if (detailed_report) { size_data[0] += nt.ncell; size_data[1] += nt.end; size_data[2] += nmech; size_data[3] += nt._ndata; size_data[4] += nt._nidata; size_data[5] += nt._nvdata; size_data[6] += nt.n_presyn; size_data[7] += nt.n_input_presyn; size_data[8] += nt.n_pntproc; size_data[9] += nt.n_netcon; size_data[10] += nt.n_weight; size_data[11] += nb_nt; } } nbyte += nccnt * sizeof(NetCon*); nbyte += output_presyn_size(); nbyte += input_presyn_size(); nbyte += nrnran123_instance_count() * nrnran123_state_size(); #ifdef DEBUG printf("%d netcon pointers %ld nbyte=%ld\n", nrnmpi_myid, nccnt, nccnt * sizeof(NetCon*)); printf("nrnran123 size=%ld cnt=%ld nbyte=%ld\n", nrnran123_state_size(), nrnran123_instance_count(), nrnran123_instance_count() * nrnran123_state_size()); printf("%d total bytes %ld\n", nrnmpi_myid, nbyte); #endif if (detailed_report) { size_data[12] = nbyte; #if NRNMPI if (corenrn_param.mpi_enable) { // last arg is op type where 1 is sum, 2 is max and any other value is min nrnmpi_long_allreduce_vec(&size_data[0], &global_size_data_sum[0], 13, 1); nrnmpi_long_allreduce_vec(&size_data[0], &global_size_data_max[0], 13, 2); nrnmpi_long_allreduce_vec(&size_data[0], &global_size_data_min[0], 13, 3); for (int i = 0; i < 13; i++) { global_size_data_avg[i] = global_size_data_sum[i] / float(nrnmpi_numprocs); } } else #endif { global_size_data_max = size_data; global_size_data_min = size_data; global_size_data_avg.assign(size_data.cbegin(), size_data.cend()); } // now print the collected data: if (nrnmpi_myid == 0) { printf("Memory size information for all NrnThreads per rank\n"); printf("------------------------------------------------------------------\n"); printf("%22s %12s %12s %12s\n", "field", "min", "max", "avg"); printf("%22s %12ld %12ld %15.2f\n", "n_cell", global_size_data_min[0], global_size_data_max[0], global_size_data_avg[0]); printf("%22s %12ld %12ld %15.2f\n", "n_compartment", global_size_data_min[1], global_size_data_max[1], global_size_data_avg[1]); printf("%22s %12ld %12ld %15.2f\n", "n_mechanism", global_size_data_min[2], global_size_data_max[2], global_size_data_avg[2]); printf("%22s %12ld %12ld %15.2f\n", "_ndata", global_size_data_min[3], global_size_data_max[3], global_size_data_avg[3]); printf("%22s %12ld %12ld %15.2f\n", "_nidata", global_size_data_min[4], global_size_data_max[4], global_size_data_avg[4]); printf("%22s %12ld %12ld %15.2f\n", "_nvdata", global_size_data_min[5], global_size_data_max[5], global_size_data_avg[5]); printf("%22s %12ld %12ld %15.2f\n", "n_presyn", global_size_data_min[6], global_size_data_max[6], global_size_data_avg[6]); printf("%22s %12ld %12ld %15.2f\n", "n_presyn (bytes)", global_size_data_min[6] * sz_presyn, global_size_data_max[6] * sz_presyn, global_size_data_avg[6] * sz_presyn); printf("%22s %12ld %12ld %15.2f\n", "n_input_presyn", global_size_data_min[7], global_size_data_max[7], global_size_data_avg[7]); printf("%22s %12ld %12ld %15.2f\n", "n_input_presyn (bytes)", global_size_data_min[7] * sz_input_presyn, global_size_data_max[7] * sz_input_presyn, global_size_data_avg[7] * sz_input_presyn); printf("%22s %12ld %12ld %15.2f\n", "n_pntproc", global_size_data_min[8], global_size_data_max[8], global_size_data_avg[8]); printf("%22s %12ld %12ld %15.2f\n", "n_pntproc (bytes)", global_size_data_min[8] * sz_pntproc, global_size_data_max[8] * sz_pntproc, global_size_data_avg[8] * sz_pntproc); printf("%22s %12ld %12ld %15.2f\n", "n_netcon", global_size_data_min[9], global_size_data_max[9], global_size_data_avg[9]); printf("%22s %12ld %12ld %15.2f\n", "n_netcon (bytes)", global_size_data_min[9] * sz_netcon, global_size_data_max[9] * sz_netcon, global_size_data_avg[9] * sz_netcon); printf("%22s %12ld %12ld %15.2f\n", "n_weight", global_size_data_min[10], global_size_data_max[10], global_size_data_avg[10]); printf("%22s %12ld %12ld %15.2f\n", "NrnThread (bytes)", global_size_data_min[11], global_size_data_max[11], global_size_data_avg[11]); printf("%22s %12ld %12ld %15.2f\n", "model size (bytes)", global_size_data_min[12], global_size_data_max[12], global_size_data_avg[12]); } } #if NRNMPI if (corenrn_param.mpi_enable) { long global_nbyte = 0; nrnmpi_long_allreduce_vec(&nbyte, &global_nbyte, 1, 1); nbyte = global_nbyte; } #endif return nbyte; } } // namespace coreneuron ================================================ FILE: coreneuron/io/nrn_setup.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/nrn_filehandler.hpp" #include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/io/user_params.hpp" #include "coreneuron/io/mem_layout_util.hpp" #include "coreneuron/io/nrn_checkpoint.hpp" namespace coreneuron { void read_phase1(NrnThread& nt, UserParams& userParams); void read_phase2(NrnThread& nt, UserParams& userParams); void read_phase3(NrnThread& nt, UserParams& userParams); void read_phasegap(NrnThread& nt, UserParams& userParams); void setup_ThreadData(NrnThread& nt); void nrn_setup(const char* filesdat, bool is_mapping_needed, CheckPoints& checkPoints, bool run_setup_cleanup = true, const char* datapath = "", const char* restore_path = "", double* mindelay = nullptr); // Functions to load and clean data; extern void nrn_init_and_load_data(int argc, char** argv, CheckPoints& checkPoints, bool is_mapping_needed = false, bool run_setup_cleanup = true); extern void allocate_data_in_mechanism_nrn_init(); extern void nrn_setup_cleanup(); extern int nrn_i_layout(int i, int cnt, int j, int size, int layout); size_t memb_list_size(NrnThreadMembList* tml, bool include_data); size_t model_size(bool detailed_report); namespace coreneuron { /// Reading phase number. enum phase { one = 1, two, three, gap }; /// Get the phase number in form of the string. template inline std::string getPhaseName(); template <> inline std::string getPhaseName() { return "1"; } template <> inline std::string getPhaseName() { return "2"; } template <> inline std::string getPhaseName() { return "3"; } template <> inline std::string getPhaseName() { return "gap"; } /// Reading phase selector. template inline void read_phase_aux(NrnThread& nt, UserParams&); template <> inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { read_phase1(nt, userParams); } template <> inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { read_phase2(nt, userParams); } template <> inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { read_phase3(nt, userParams); } template <> inline void read_phase_aux(NrnThread& nt, UserParams& userParams) { read_phasegap(nt, userParams); } /// Reading phase wrapper for each neuron group. template inline void* phase_wrapper_w(NrnThread* nt, UserParams& userParams, bool in_memory_transfer) { int i = nt->id; if (i < userParams.ngroup) { if (!in_memory_transfer) { const char* data_dir = userParams.path; // directory to read could be different for phase 2 if we are restoring // all other phases still read from dataset directory because the data // is constant if (P == 2) { data_dir = userParams.restore_path; } std::string fname = std::string(data_dir) + "/" + std::to_string(userParams.gidgroups[i]) + "_" + getPhaseName

() + ".dat"; // Avoid trying to open the gid_gap.dat file if it doesn't exist when there are no // gap junctions in this gid. // Note that we still need to close `userParams.file_reader[i]` // because files are opened in the order of `gid_1.dat`, `gid_2.dat` and `gid_gap.dat`. // When we open next file, `gid_gap.dat` in this case, we are supposed to close the // handle for `gid_2.dat` even though file doesn't exist. if (P == gap && !FileHandler::file_exist(fname)) { userParams.file_reader[i].close(); } else { // if no file failed to open or not opened at all userParams.file_reader[i].open(fname); } } read_phase_aux

(*nt, userParams); if (!in_memory_transfer) { userParams.file_reader[i].close(); } if (P == 2) { setup_ThreadData(*nt); } } return nullptr; } /// Specific phase reading executed by threads. template inline static void phase_wrapper(UserParams& userParams, int direct = 0) { nrn_multithread_job(phase_wrapper_w

, userParams, direct != 0); } } // namespace coreneuron } // namespace coreneuron ================================================ FILE: coreneuron/io/nrnsection_mapping.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include #include #include #include #include namespace coreneuron { /** type to store every section and associated segments */ using segvec_type = std::vector; using secseg_map_type = std::map; using secseg_it_type = secseg_map_type::iterator; /** @brief Section to segment mapping * * For a section list (of a particulat type), store mapping * of section to segments * a section is a arbitrary user classification to recognize some segments (ex: api, soma, dend, * axon) * */ struct SecMapping { /** name of section list */ std::string name; /** map of section and associated segments */ secseg_map_type secmap; SecMapping() = default; explicit SecMapping(std::string s) : name(std::move(s)) {} /** @brief return total number of sections in section list */ size_t num_sections() const noexcept { return secmap.size(); } /** @brief return number of segments in section list */ size_t num_segments() const { return std::accumulate(secmap.begin(), secmap.end(), 0, [](int psum, const auto& item) { return psum + item.second.size(); }); } /** @brief add section to associated segment */ void add_segment(int sec, int seg) { secmap[sec].push_back(seg); } }; /** @brief Compartment mapping information for a cell * * A cell can have multiple section list types like * soma, axon, apic, dend etc. User will add these * section lists using HOC interface. */ struct CellMapping { /** gid of a cell */ int gid; /** list of section lists (like soma, axon, apic) */ std::vector secmapvec; CellMapping(int g) : gid(g) {} /** @brief total number of sections in a cell */ int num_sections() const { return std::accumulate(secmapvec.begin(), secmapvec.end(), 0, [](int psum, const auto& secmap) { return psum + secmap->num_sections(); }); } /** @brief return number of segments in a cell */ int num_segments() const { return std::accumulate(secmapvec.begin(), secmapvec.end(), 0, [](int psum, const auto& secmap) { return psum + secmap->num_segments(); }); } /** @brief number of section lists */ size_t size() const noexcept { return secmapvec.size(); } /** @brief add new SecMapping */ void add_sec_map(SecMapping* s) { secmapvec.push_back(s); } /** @brief return section list mapping with given name */ SecMapping* get_seclist_mapping(const std::string& name) const { for (auto& secmap: secmapvec) { if (name == secmap->name) { return secmap; } } std::cout << "Warning: Section mapping list " << name << " doesn't exist! \n"; return nullptr; } /** @brief return segment count for specific section list with given name */ size_t get_seclist_segment_count(const std::string& name) const { SecMapping* s = get_seclist_mapping(name); size_t count = 0; if (s) { count = s->num_segments(); } return count; } /** @brief return segment count for specific section list with given name */ size_t get_seclist_section_count(const std::string& name) const { SecMapping* s = get_seclist_mapping(name); size_t count = 0; if (s) { count = s->num_sections(); } return count; } ~CellMapping() { for (size_t i = 0; i < secmapvec.size(); i++) { delete secmapvec[i]; } } }; /** @brief Compartment mapping information for NrnThread * * NrnThread could have more than one cell in cellgroup * and we store this in vector. */ struct NrnThreadMappingInfo { /** list of cells mapping */ std::vector mappingvec; /** @brief number of cells */ size_t size() const { return mappingvec.size(); } /** @brief memory cleanup */ ~NrnThreadMappingInfo() { for (size_t i = 0; i < mappingvec.size(); i++) { delete mappingvec[i]; } } /** @brief get cell mapping information for given gid * if exist otherwise return nullptr. */ CellMapping* get_cell_mapping(int gid) const { for (const auto& mapping: mappingvec) { if (mapping->gid == gid) { return mapping; } } return nullptr; } /** @brief add mapping information of new cell */ void add_cell_mapping(CellMapping* c) { mappingvec.push_back(c); } }; } // namespace coreneuron ================================================ FILE: coreneuron/io/output_spikes.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include // std::lenght_error #include #include #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/io/nrn2core_direct.h" #include "coreneuron/io/output_spikes.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/utils/nrnmutdec.hpp" #include "coreneuron/mpi/nrnmpidec.h" #include "coreneuron/utils/string_utils.h" #include "coreneuron/apps/corenrn_parameters.hpp" #ifdef ENABLE_SONATA_REPORTS #include "bbp/sonata/reports.h" #endif // ENABLE_SONATA_REPORTS /** * @brief Return all spike vectors to NEURON * * @param spiketvec - vector of spikes at the end of CORENEURON simulation * @param spikegidvec - vector of gids at the end of CORENEURON simulation * @return true if we are in embedded_run and NEURON has successfully retrieved the vectors */ static bool all_spikes_return(std::vector& spiketvec, std::vector& spikegidvec) { return corenrn_embedded && nrn2core_all_spike_vectors_return_ && (*nrn2core_all_spike_vectors_return_)(spiketvec, spikegidvec); } namespace coreneuron { /// --> Coreneuron as SpikeBuffer class std::vector spikevec_time; std::vector spikevec_gid; static OMP_Mutex mut; void mk_spikevec_buffer(int sz) { try { spikevec_time.reserve(sz); spikevec_gid.reserve(sz); } catch (const std::length_error& le) { std::cerr << "Lenght error" << le.what() << std::endl; } } void spikevec_lock() { mut.lock(); } void spikevec_unlock() { mut.unlock(); } static void local_spikevec_sort(std::vector& isvect, std::vector& isvecg, std::vector& osvect, std::vector& osvecg) { osvect.resize(isvect.size()); osvecg.resize(isvecg.size()); // first build a permutation vector std::vector perm(isvect.size()); std::iota(perm.begin(), perm.end(), 0); // sort by gid (second predicate first) std::stable_sort(perm.begin(), perm.end(), [&](std::size_t i, std::size_t j) { return isvecg[i] < isvecg[j]; }); // then sort by time std::stable_sort(perm.begin(), perm.end(), [&](std::size_t i, std::size_t j) { return isvect[i] < isvect[j]; }); // now apply permutation to time and gid output vectors std::transform(perm.begin(), perm.end(), osvect.begin(), [&](std::size_t i) { return isvect[i]; }); std::transform(perm.begin(), perm.end(), osvecg.begin(), [&](std::size_t i) { return isvecg[i]; }); } #if NRNMPI static void sort_spikes(std::vector& spikevec_time, std::vector& spikevec_gid) { double lmin_time = std::numeric_limits::max(); double lmax_time = std::numeric_limits::min(); if (!spikevec_time.empty()) { lmin_time = *(std::min_element(spikevec_time.begin(), spikevec_time.end())); lmax_time = *(std::max_element(spikevec_time.begin(), spikevec_time.end())); } double min_time = nrnmpi_dbl_allmin(lmin_time); double max_time = nrnmpi_dbl_allmax(lmax_time); // allocate send and receive counts and displacements for MPI_Alltoallv std::vector snd_cnts(nrnmpi_numprocs); std::vector rcv_cnts(nrnmpi_numprocs); std::vector snd_dsps(nrnmpi_numprocs); std::vector rcv_dsps(nrnmpi_numprocs); double bin_t = (max_time - min_time) / nrnmpi_numprocs; bin_t = bin_t ? bin_t : 1; // first find number of spikes in each time window for (const auto& st: spikevec_time) { int idx = (int) (st - min_time) / bin_t; snd_cnts[idx]++; } for (int i = 1; i < nrnmpi_numprocs; i++) { snd_dsps[i] = snd_dsps[i - 1] + snd_cnts[i - 1]; } // now let each rank know how many spikes they will receive // and get in turn all the buffer sizes to receive nrnmpi_int_alltoall(&snd_cnts[0], &rcv_cnts[0], 1); for (int i = 1; i < nrnmpi_numprocs; i++) { rcv_dsps[i] = rcv_dsps[i - 1] + rcv_cnts[i - 1]; } std::size_t new_sz = 0; for (const auto& r: rcv_cnts) { new_sz += r; } // prepare new sorted vectors std::vector svt_buf(new_sz, 0.0); std::vector svg_buf(new_sz, 0); // now exchange data nrnmpi_dbl_alltoallv(spikevec_time.data(), &snd_cnts[0], &snd_dsps[0], svt_buf.data(), &rcv_cnts[0], &rcv_dsps[0]); nrnmpi_int_alltoallv(spikevec_gid.data(), &snd_cnts[0], &snd_dsps[0], svg_buf.data(), &rcv_cnts[0], &rcv_dsps[0]); local_spikevec_sort(svt_buf, svg_buf, spikevec_time, spikevec_gid); } #ifdef ENABLE_SONATA_REPORTS /** Split spikevec_time and spikevec_gid by populations * Add spike data with population name and gid offset tolibsonatareport API */ void output_spike_populations(const SpikesInfo& spikes_info) { // Write spikes with default population name and offset if (spikes_info.population_info.empty()) { sonata_add_spikes_population("All", 0, spikevec_time.data(), spikevec_time.size(), spikevec_gid.data(), spikevec_gid.size()); return; } int n_populations = spikes_info.population_info.size(); for (int idx = 0; idx < n_populations; idx++) { const auto& curr_pop = spikes_info.population_info[idx]; std::string population_name = curr_pop.first; int population_offset = curr_pop.second; int gid_lower = population_offset; int gid_upper = std::numeric_limits::max(); if (idx != n_populations - 1) { gid_upper = spikes_info.population_info[idx + 1].second - 1; } std::vector pop_spikevec_time; std::vector pop_spikevec_gid; for (int j = 0; j < spikevec_gid.size(); j++) { if (spikevec_gid[j] >= gid_lower && spikevec_gid[j] <= gid_upper) { pop_spikevec_time.push_back(spikevec_time[j]); pop_spikevec_gid.push_back(spikevec_gid[j]); } } sonata_add_spikes_population(population_name.data(), population_offset, pop_spikevec_time.data(), pop_spikevec_time.size(), pop_spikevec_gid.data(), pop_spikevec_gid.size()); } } #endif // ENABLE_SONATA_REPORTS /** Write generated spikes to out.dat using mpi parallel i/o. * \todo : MPI related code should be factored into nrnmpi.c * Check spike record length which is set to 64 chars */ static void output_spikes_parallel(const char* outpath, const SpikesInfo& spikes_info) { std::stringstream ss; ss << outpath << "/out.dat"; std::string fname = ss.str(); // remove if file already exist if (nrnmpi_myid == 0) { remove(fname.c_str()); } #ifdef ENABLE_SONATA_REPORTS sonata_create_spikefile(outpath, spikes_info.file_name.data()); output_spike_populations(spikes_info); sonata_write_spike_populations(); sonata_close_spikefile(); #endif // ENABLE_SONATA_REPORTS sort_spikes(spikevec_time, spikevec_gid); nrnmpi_barrier(); // each spike record in the file is time + gid (64 chars sufficient) const int SPIKE_RECORD_LEN = 64; size_t num_spikes = spikevec_gid.size(); size_t num_bytes = (sizeof(char) * num_spikes * SPIKE_RECORD_LEN); char* spike_data = (char*) malloc(num_bytes); if (spike_data == nullptr) { printf("Error while writing spikes due to memory allocation\n"); return; } // empty if no spikes strcpy(spike_data, ""); // populate buffer with all spike entries char spike_entry[SPIKE_RECORD_LEN]; size_t spike_data_offset = 0; for (size_t i = 0; i < num_spikes; i++) { int spike_entry_chars = snprintf(spike_entry, 64, "%.8g\t%d\n", spikevec_time[i], spikevec_gid[i]); spike_data_offset = strcat_at_pos(spike_data, spike_data_offset, spike_entry, spike_entry_chars); } // calculate offset into global file. note that we don't write // all num_bytes but only "populated" buffer size_t num_chars = strlen(spike_data); nrnmpi_write_file(fname, spike_data, num_chars); free(spike_data); } #endif static void output_spikes_serial(const char* outpath) { std::stringstream ss; ss << outpath << "/out.dat"; std::string fname = ss.str(); // reserve some space for sorted spikevec buffers std::vector sorted_spikevec_time(spikevec_time.size()); std::vector sorted_spikevec_gid(spikevec_gid.size()); local_spikevec_sort(spikevec_time, spikevec_gid, sorted_spikevec_time, sorted_spikevec_gid); // remove if file already exist remove(fname.c_str()); FILE* f = fopen(fname.c_str(), "w"); if (!f && nrnmpi_myid == 0) { std::cout << "WARNING: Could not open file for writing spikes." << std::endl; return; } for (std::size_t i = 0; i < sorted_spikevec_gid.size(); ++i) if (sorted_spikevec_gid[i] > -1) fprintf(f, "%.8g\t%d\n", sorted_spikevec_time[i], sorted_spikevec_gid[i]); fclose(f); } void output_spikes(const char* outpath, const SpikesInfo& spikes_info) { // try to transfer spikes to NEURON. If successfull, don't write out.dat if (all_spikes_return(spikevec_time, spikevec_gid)) { clear_spike_vectors(); return; } #if NRNMPI if (corenrn_param.mpi_enable && nrnmpi_initialized()) { output_spikes_parallel(outpath, spikes_info); } else #endif { output_spikes_serial(outpath); } clear_spike_vectors(); } void clear_spike_vectors() { auto spikevec_time_capacity = spikevec_time.capacity(); auto spikevec_gid_capacity = spikevec_gid.capacity(); spikevec_time.clear(); spikevec_gid.clear(); spikevec_time.reserve(spikevec_time_capacity); spikevec_gid.reserve(spikevec_gid_capacity); } void validation(std::vector>& res) { for (unsigned i = 0; i < spikevec_gid.size(); ++i) if (spikevec_gid[i] > -1) res.push_back(std::make_pair(spikevec_time[i], spikevec_gid[i])); } } // namespace coreneuron ================================================ FILE: coreneuron/io/output_spikes.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include #include "coreneuron/io/reports/nrnreport.hpp" namespace coreneuron { void output_spikes(const char* outpath, const SpikesInfo& spikes_info); void mk_spikevec_buffer(int); extern std::vector spikevec_time; extern std::vector spikevec_gid; void clear_spike_vectors(); void validation(std::vector>& res); void spikevec_lock(); void spikevec_unlock(); } // namespace coreneuron ================================================ FILE: coreneuron/io/phase1.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/phase1.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" int (*nrn2core_get_dat1_)(int tid, int& n_presyn, int& n_netcon, int*& output_gid, int*& netcon_srcgid, std::vector& netcon_negsrcgid_tid); namespace coreneuron { Phase1::Phase1(FileHandler& F) { assert(!F.fail()); int n_presyn = F.read_int(); /// Number of PreSyn-s in NrnThread nt int n_netcon = F.read_int(); /// Number of NetCon-s in NrnThread nt this->output_gids = F.read_vector(n_presyn); this->netcon_srcgids = F.read_vector(n_netcon); // For file mode transfer, it is not allowed that negative gids exist // in different threads. So this->netcon_tids remains clear. F.close(); } Phase1::Phase1(int thread_id) { int* output_gids; int* netcon_srcgid; int n_presyn; int n_netcon; // TODO : check error codes for NEURON - CoreNEURON communication int valid = (*nrn2core_get_dat1_)( thread_id, n_presyn, n_netcon, output_gids, netcon_srcgid, this->netcon_negsrcgid_tid); if (!valid) { return; } this->output_gids = std::vector(output_gids, output_gids + n_presyn); delete[] output_gids; this->netcon_srcgids = std::vector(netcon_srcgid, netcon_srcgid + n_netcon); delete[] netcon_srcgid; } void Phase1::populate(NrnThread& nt, OMP_Mutex& mut) { nt.n_presyn = this->output_gids.size(); nt.n_netcon = this->netcon_srcgids.size(); nrnthreads_netcon_srcgid[nt.id] = new int[nt.n_netcon]; std::copy(this->netcon_srcgids.begin(), this->netcon_srcgids.end(), nrnthreads_netcon_srcgid[nt.id]); // netcon_negsrcgid_tid is empty if file transfer or single thread coreneuron::nrnthreads_netcon_negsrcgid_tid[nt.id] = this->netcon_negsrcgid_tid; nt.netcons = new NetCon[nt.n_netcon]; if (nt.n_presyn) { nt.presyns_helper = (PreSynHelper*) ecalloc_align(nt.n_presyn, sizeof(PreSynHelper)); nt.presyns = new PreSyn[nt.n_presyn]; } PreSyn* ps = nt.presyns; /// go through all presyns for (auto& gid: this->output_gids) { if (gid == -1) { ++ps; continue; } { const std::lock_guard lock(mut); // Note that the negative (type, index) // coded information goes into the neg_gid2out[tid] hash table. // See netpar.cpp for the netpar_tid_... function implementations. // Both that table and the process wide gid2out table can be deleted // before the end of setup /// Put gid into the gid2out hash table with correspondent output PreSyn /// Or to the negative PreSyn map if (gid >= 0) { char m[200]; if (gid2in.find(gid) != gid2in.end()) { sprintf(m, "gid=%d already exists as an input port", gid); hoc_execerror(m, "Setup all the output ports on this process before using them as " "input ports."); } if (gid2out.find(gid) != gid2out.end()) { sprintf(m, "gid=%d already exists on this process as an output port", gid); hoc_execerror(m, 0); } ps->gid_ = gid; ps->output_index_ = gid; gid2out[gid] = ps; } else { nrn_assert(neg_gid2out[nt.id].find(gid) == neg_gid2out[nt.id].end()); ps->output_index_ = -1; neg_gid2out[nt.id][gid] = ps; } } // end of the mutex ++ps; } } } // namespace coreneuron ================================================ FILE: coreneuron/io/phase1.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include "coreneuron/io/nrn_filehandler.hpp" #include "coreneuron/utils/nrnmutdec.hpp" namespace coreneuron { struct NrnThread; class Phase1 { public: Phase1(FileHandler& F); Phase1(int thread_id); void populate(NrnThread& nt, OMP_Mutex& mut); private: std::vector output_gids; std::vector netcon_srcgids; std::vector netcon_negsrcgid_tid; // entries only for negative srcgids }; } // namespace coreneuron ================================================ FILE: coreneuron/io/phase2.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/io/phase2.hpp" #include "coreneuron/coreneuron.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/nrn_checkpoint.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/permute/data_layout.hpp" #include "coreneuron/permute/node_permute.h" #include "coreneuron/utils/utils.hpp" #include "coreneuron/utils/vrecitem.h" #include "coreneuron/io/mem_layout_util.hpp" #include "coreneuron/io/setup_fornetcon.hpp" #if defined(_OPENMP) #include #endif int (*nrn2core_get_dat2_1_)(int tid, int& n_real_cell, int& ngid, int& n_real_gid, int& nnode, int& ndiam, int& nmech, int*& tml_index, int*& ml_nodecount, int& nidata, int& nvdata, int& nweight); int (*nrn2core_get_dat2_2_)(int tid, int*& v_parent_index, double*& a, double*& b, double*& area, double*& v, double*& diamvec); int (*nrn2core_get_dat2_mech_)(int tid, size_t i, int dsz_inst, int*& nodeindices, double*& data, int*& pdata, std::vector& pointer2type); int (*nrn2core_get_dat2_3_)(int tid, int nweight, int*& output_vindex, double*& output_threshold, int*& netcon_pnttype, int*& netcon_pntindex, double*& weights, double*& delays); int (*nrn2core_get_dat2_corepointer_)(int tid, int& n); int (*nrn2core_get_dat2_corepointer_mech_)(int tid, int type, int& icnt, int& dcnt, int*& iarray, double*& darray); int (*nrn2core_get_dat2_vecplay_)(int tid, std::vector& indices); int (*nrn2core_get_dat2_vecplay_inst_)(int tid, int i, int& vptype, int& mtype, int& ix, int& sz, double*& yvec, double*& tvec, int& last_index, int& discon_index, int& ubound_index); namespace coreneuron { template inline void mech_data_layout_transform(T* data, int cnt, int sz, int layout) { if (layout == Layout::AoS) { return; } // layout is equal to Layout::SoA int align_cnt = nrn_soa_padded_size(cnt, layout); std::vector d(cnt * sz); // copy matrix for (int i = 0; i < cnt; ++i) { for (int j = 0; j < sz; ++j) { d[i * sz + j] = data[i * sz + j]; } } // transform memory layout for (int i = 0; i < cnt; ++i) { for (int j = 0; j < sz; ++j) { data[i + j * align_cnt] = d[i * sz + j]; } } } void Phase2::read_file(FileHandler& F, const NrnThread& nt) { n_real_cell = F.read_int(); n_output = F.read_int(); n_real_output = F.read_int(); n_node = F.read_int(); n_diam = F.read_int(); n_mech = F.read_int(); mech_types = std::vector(n_mech, 0); nodecounts = std::vector(n_mech, 0); for (int i = 0; i < n_mech; ++i) { mech_types[i] = F.read_int(); nodecounts[i] = F.read_int(); } // check mechanism compatibility before reading data check_mechanism(); n_idata = F.read_int(); n_vdata = F.read_int(); int n_weight = F.read_int(); v_parent_index = (int*) ecalloc_align(n_node, sizeof(int)); F.read_array(v_parent_index, n_node); int n_data_padded = nrn_soa_padded_size(n_node, SOA_LAYOUT); { { // Compute size of _data and allocate int n_data = 6 * n_data_padded; if (n_diam > 0) { n_data += n_data_padded; } for (int i = 0; i < n_mech; ++i) { int layout = corenrn.get_mech_data_layout()[mech_types[i]]; int n = nodecounts[i]; int sz = corenrn.get_prop_param_size()[mech_types[i]]; n_data = nrn_soa_byte_align(n_data); n_data += nrn_soa_padded_size(n, layout) * sz; } _data = (double*) ecalloc_align(n_data, sizeof(double)); } F.read_array(_data + 2 * n_data_padded, n_node); F.read_array(_data + 3 * n_data_padded, n_node); F.read_array(_data + 5 * n_data_padded, n_node); F.read_array(_data + 4 * n_data_padded, n_node); if (n_diam > 0) { F.read_array(_data + 6 * n_data_padded, n_node); } } size_t offset = 6 * n_data_padded; if (n_diam > 0) { offset += n_data_padded; } for (int i = 0; i < n_mech; ++i) { int layout = corenrn.get_mech_data_layout()[mech_types[i]]; int n = nodecounts[i]; int sz = corenrn.get_prop_param_size()[mech_types[i]]; int dsz = corenrn.get_prop_dparam_size()[mech_types[i]]; offset = nrn_soa_byte_align(offset); std::vector nodeindices; if (!corenrn.get_is_artificial()[mech_types[i]]) { nodeindices = F.read_vector(n); } F.read_array(_data + offset, sz * n); offset += nrn_soa_padded_size(n, layout) * sz; std::vector pdata; if (dsz > 0) { pdata = F.read_vector(dsz * n); } tmls.emplace_back(TML{nodeindices, pdata, mech_types[i], {}, {}}); if (dsz > 0) { int sz = F.read_int(); if (sz) { auto& p2t = tmls.back().pointer2type; p2t = F.read_vector(sz); } } } output_vindex = F.read_vector(nt.n_presyn); output_threshold = F.read_vector(n_real_output); pnttype = F.read_vector(nt.n_netcon); pntindex = F.read_vector(nt.n_netcon); weights = F.read_vector(n_weight); delay = F.read_vector(nt.n_netcon); num_point_process = F.read_int(); for (int i = 0; i < n_mech; ++i) { if (!corenrn.get_bbcore_read()[mech_types[i]]) { continue; } tmls[i].type = F.read_int(); int icnt = F.read_int(); int dcnt = F.read_int(); if (icnt > 0) { tmls[i].iArray = F.read_vector(icnt); } if (dcnt > 0) { tmls[i].dArray = F.read_vector(dcnt); } } int n_vec_play_continuous = F.read_int(); vec_play_continuous.reserve(n_vec_play_continuous); for (int i = 0; i < n_vec_play_continuous; ++i) { VecPlayContinuous_ item; item.vtype = F.read_int(); item.mtype = F.read_int(); item.ix = F.read_int(); int sz = F.read_int(); item.yvec = IvocVect(sz); item.tvec = IvocVect(sz); F.read_array(item.yvec.data(), sz); F.read_array(item.tvec.data(), sz); vec_play_continuous.push_back(std::move(item)); } // store current checkpoint state to continue reading mapping // The checkpoint numbering in phase 3 is a continuing of phase 2, and so will be restored F.record_checkpoint(); if (F.eof()) return; nrn_assert(F.read_int() == n_vec_play_continuous); for (int i = 0; i < n_vec_play_continuous; ++i) { auto& vecPlay = vec_play_continuous[i]; vecPlay.last_index = F.read_int(); vecPlay.discon_index = F.read_int(); vecPlay.ubound_index = F.read_int(); } patstim_index = F.read_int(); nrn_assert(F.read_int() == -1); for (int i = 0; i < nt.n_presyn; ++i) { preSynConditionEventFlags.push_back(F.read_int()); } nrn_assert(F.read_int() == -1); restore_events(F); nrn_assert(F.read_int() == -1); restore_events(F); } void Phase2::read_direct(int thread_id, const NrnThread& nt) { int* types_ = nullptr; int* nodecounts_ = nullptr; int n_weight; (*nrn2core_get_dat2_1_)(thread_id, n_real_cell, n_output, n_real_output, n_node, n_diam, n_mech, types_, nodecounts_, n_idata, n_vdata, n_weight); mech_types = std::vector(types_, types_ + n_mech); delete[] types_; nodecounts = std::vector(nodecounts_, nodecounts_ + n_mech); delete[] nodecounts_; check_mechanism(); // TODO: fix it in the future int n_data_padded = nrn_soa_padded_size(n_node, SOA_LAYOUT); int n_data = 6 * n_data_padded; if (n_diam > 0) { n_data += n_data_padded; } for (int i = 0; i < n_mech; ++i) { int layout = corenrn.get_mech_data_layout()[mech_types[i]]; int n = nodecounts[i]; int sz = corenrn.get_prop_param_size()[mech_types[i]]; n_data = nrn_soa_byte_align(n_data); n_data += nrn_soa_padded_size(n, layout) * sz; } _data = (double*) ecalloc_align(n_data, sizeof(double)); v_parent_index = (int*) ecalloc_align(n_node, sizeof(int)); double* actual_a = _data + 2 * n_data_padded; double* actual_b = _data + 3 * n_data_padded; double* actual_v = _data + 4 * n_data_padded; double* actual_area = _data + 5 * n_data_padded; double* actual_diam = n_diam > 0 ? _data + 6 * n_data_padded : nullptr; (*nrn2core_get_dat2_2_)( thread_id, v_parent_index, actual_a, actual_b, actual_area, actual_v, actual_diam); tmls.resize(n_mech); auto& param_sizes = corenrn.get_prop_param_size(); auto& dparam_sizes = corenrn.get_prop_dparam_size(); int dsz_inst = 0; size_t offset = 6 * n_data_padded; if (n_diam > 0) offset += n_data_padded; for (int i = 0; i < n_mech; ++i) { auto& tml = tmls[i]; int type = mech_types[i]; int layout = corenrn.get_mech_data_layout()[type]; offset = nrn_soa_byte_align(offset); tml.type = type; // artificial cell don't use nodeindices if (!corenrn.get_is_artificial()[type]) { tml.nodeindices.resize(nodecounts[i]); } tml.pdata.resize(nodecounts[i] * dparam_sizes[type]); int* nodeindices_ = nullptr; double* data_ = _data + offset; int* pdata_ = const_cast(tml.pdata.data()); (*nrn2core_get_dat2_mech_)(thread_id, i, dparam_sizes[type] > 0 ? dsz_inst : 0, nodeindices_, data_, pdata_, tml.pointer2type); if (dparam_sizes[type] > 0) dsz_inst++; offset += nrn_soa_padded_size(nodecounts[i], layout) * param_sizes[type]; if (nodeindices_) { std::copy(nodeindices_, nodeindices_ + nodecounts[i], tml.nodeindices.data()); free(nodeindices_); // not free_memory because this is allocated by NEURON? } if (corenrn.get_is_artificial()[type]) { assert(nodeindices_ == nullptr); } } int* output_vindex_ = nullptr; double* output_threshold_ = nullptr; int* pnttype_ = nullptr; int* pntindex_ = nullptr; double* weight_ = nullptr; double* delay_ = nullptr; (*nrn2core_get_dat2_3_)(thread_id, n_weight, output_vindex_, output_threshold_, pnttype_, pntindex_, weight_, delay_); output_vindex = std::vector(output_vindex_, output_vindex_ + nt.n_presyn); delete[] output_vindex_; output_threshold = std::vector(output_threshold_, output_threshold_ + n_real_output); delete[] output_threshold_; int n_netcon = nt.n_netcon; pnttype = std::vector(pnttype_, pnttype_ + n_netcon); delete[] pnttype_; pntindex = std::vector(pntindex_, pntindex_ + n_netcon); delete[] pntindex_; weights = std::vector(weight_, weight_ + n_weight); delete[] weight_; delay = std::vector(delay_, delay_ + n_netcon); delete[] delay_; (*nrn2core_get_dat2_corepointer_)(nt.id, num_point_process); for (int i = 0; i < n_mech; ++i) { // not all mod files have BBCOREPOINTER data to read if (!corenrn.get_bbcore_read()[mech_types[i]]) { continue; } int icnt; int* iArray_ = nullptr; int dcnt; double* dArray_ = nullptr; (*nrn2core_get_dat2_corepointer_mech_)(nt.id, tmls[i].type, icnt, dcnt, iArray_, dArray_); tmls[i].iArray.resize(icnt); std::copy(iArray_, iArray_ + icnt, tmls[i].iArray.begin()); delete[] iArray_; tmls[i].dArray.resize(dcnt); std::copy(dArray_, dArray_ + dcnt, tmls[i].dArray.begin()); delete[] dArray_; } // Get from NEURON, the VecPlayContinuous indices in // NetCvode::fixed_play_ for this thread. std::vector indices_vec_play_continuous; (*nrn2core_get_dat2_vecplay_)(thread_id, indices_vec_play_continuous); // i is an index into NEURON's NetCvode::fixed_play_ for this thread. for (auto i: indices_vec_play_continuous) { VecPlayContinuous_ item; // yvec_ and tvec_ are not deleted as that space is within // NEURON Vector double *yvec_, *tvec_; int sz; (*nrn2core_get_dat2_vecplay_inst_)(thread_id, i, item.vtype, item.mtype, item.ix, sz, yvec_, tvec_, item.last_index, item.discon_index, item.ubound_index); item.yvec = IvocVect(sz); item.tvec = IvocVect(sz); std::copy(yvec_, yvec_ + sz, item.yvec.data()); std::copy(tvec_, tvec_ + sz, item.tvec.data()); vec_play_continuous.push_back(std::move(item)); } } /// Check if MOD file used between NEURON and CoreNEURON is same void Phase2::check_mechanism() { int diff_mech_count = 0; for (int i = 0; i < n_mech; ++i) { if (std::any_of(corenrn.get_different_mechanism_type().begin(), corenrn.get_different_mechanism_type().end(), [&](int e) { return e == mech_types[i]; })) { if (nrnmpi_myid == 0) { printf("Error: %s is a different MOD file than used by NEURON!\n", nrn_get_mechname(mech_types[i])); } diff_mech_count++; } } if (diff_mech_count > 0) { if (nrnmpi_myid == 0) { printf( "Error : NEURON and CoreNEURON must use same mod files for compatibility, %d " "different mod file(s) found. Re-compile special and special-core!\n", diff_mech_count); nrn_abort(1); } } } /// Perform in memory transformation between AoS<>SoA for integer data void Phase2::transform_int_data(int elem0, int nodecount, int* pdata, int i, int dparam_size, int layout, int n_node_) { for (int iml = 0; iml < nodecount; ++iml) { int* pd = pdata + nrn_i_layout(iml, nodecount, i, dparam_size, layout); int ix = *pd; // relative to beginning of _actual_* nrn_assert((ix >= 0) && (ix < n_node_)); *pd = elem0 + ix; // relative to nt._data } } void Phase2::set_net_send_buffer(Memb_list** ml_list, const std::vector& pnt_offset) { // NetReceiveBuffering for (auto& net_buf_receive: corenrn.get_net_buf_receive()) { int type = net_buf_receive.second; // Does this thread have this type. Memb_list* ml = ml_list[type]; if (ml) { // needs a NetReceiveBuffer NetReceiveBuffer_t* nrb = (NetReceiveBuffer_t*) ecalloc_align(1, sizeof(NetReceiveBuffer_t)); assert(!ml->_net_receive_buffer); ml->_net_receive_buffer = nrb; nrb->_pnt_offset = pnt_offset[type]; // begin with a size equal to the number of instances, or at least 8 nrb->_size = std::max(8, ml->nodecount); nrb->_pnt_index = (int*) ecalloc_align(nrb->_size, sizeof(int)); nrb->_displ = (int*) ecalloc_align(nrb->_size + 1, sizeof(int)); nrb->_nrb_index = (int*) ecalloc_align(nrb->_size, sizeof(int)); nrb->_weight_index = (int*) ecalloc_align(nrb->_size, sizeof(int)); nrb->_nrb_t = (double*) ecalloc_align(nrb->_size, sizeof(double)); nrb->_nrb_flag = (double*) ecalloc_align(nrb->_size, sizeof(double)); } } // NetSendBuffering for (int type: corenrn.get_net_buf_send_type()) { // Does this thread have this type. Memb_list* ml = ml_list[type]; if (ml) { // needs a NetSendBuffer assert(!ml->_net_send_buffer); // begin with a size equal to twice number of instances NetSendBuffer_t* nsb = new NetSendBuffer_t(ml->nodecount * 2); ml->_net_send_buffer = nsb; } } } void Phase2::restore_events(FileHandler& F) { int type; while ((type = F.read_int()) != 0) { double time; F.read_array(&time, 1); switch (type) { case NetConType: { auto event = std::make_shared(); event->time = time; event->netcon_index = F.read_int(); events.emplace_back(type, event); break; } case SelfEventType: { auto event = std::make_shared(); event->time = time; event->target_type = F.read_int(); event->point_proc_instance = F.read_int(); event->target_instance = F.read_int(); F.read_array(&event->flag, 1); event->movable = F.read_int(); event->weight_index = F.read_int(); events.emplace_back(type, event); break; } case PreSynType: { auto event = std::make_shared(); event->time = time; event->presyn_index = F.read_int(); events.emplace_back(type, event); break; } case NetParEventType: { auto event = std::make_shared(); event->time = time; events.emplace_back(type, event); break; } case PlayRecordEventType: { auto event = std::make_shared(); event->time = time; event->play_record_type = F.read_int(); if (event->play_record_type == VecPlayContinuousType) { event->vecplay_index = F.read_int(); events.emplace_back(type, event); } else { nrn_assert(0); } break; } default: { nrn_assert(0); break; } } } } void Phase2::fill_before_after_lists(NrnThread& nt, const std::vector& memb_func) { /// Fill the BA lists std::vector before_after_map(memb_func.size()); for (int i = 0; i < BEFORE_AFTER_SIZE; ++i) { for (size_t ii = 0; ii < memb_func.size(); ++ii) { before_after_map[ii] = nullptr; } // Save first before-after block only. In case of multiple before-after blocks with the // same mech type, we will get subsequent ones using linked list below. for (auto bam = corenrn.get_bamech()[i]; bam; bam = bam->next) { if (!before_after_map[bam->type]) { before_after_map[bam->type] = bam; } } // necessary to keep in order wrt multiple BAMech with same mech type NrnThreadBAList** ptbl = nt.tbl + i; for (auto tml = nt.tml; tml; tml = tml->next) { if (before_after_map[tml->index]) { int mtype = tml->index; for (auto bam = before_after_map[mtype]; bam && bam->type == mtype; bam = bam->next) { auto tbl = (NrnThreadBAList*) emalloc(sizeof(NrnThreadBAList)); *ptbl = tbl; tbl->next = nullptr; tbl->bam = bam; tbl->ml = tml->ml; ptbl = &(tbl->next); } } } } } void Phase2::pdata_relocation(const NrnThread& nt, const std::vector& memb_func) { // Some pdata may index into data which has been reordered from AoS to // SoA. The four possibilities are if semantics is -1 (area), -5 (pointer), // -9 (diam), // or 0-999 (ion variables). // Note that pdata has a layout and the // type block in nt.data into which // it indexes, has a layout. // For faster search of tmls[i].type == type, use a map. // (perhaps would be better to replace tmls so that we can use tmls[type]. std::map type2itml; for (size_t i = 0; i < tmls.size(); ++i) { if (tmls[i].pointer2type.size()) { type2itml[tmls[i].type] = i; } } for (auto tml = nt.tml; tml; tml = tml->next) { int type = tml->index; int layout = corenrn.get_mech_data_layout()[type]; int* pdata = tml->ml->pdata; int cnt = tml->ml->nodecount; int szdp = corenrn.get_prop_dparam_size()[type]; int* semantics = memb_func[type].dparam_semantics; // compute only for ARTIFICIAL_CELL (has useful area pointer with semantics=-1) if (!corenrn.get_is_artificial()[type]) { if (szdp) { if (!semantics) continue; // temporary for HDFReport, Binreport which will be skipped in // bbcore_write of HBPNeuron nrn_assert(semantics); } for (int i = 0; i < szdp; ++i) { int s = semantics[i]; switch (s) { case -1: // area transform_int_data( nt._actual_area - nt._data, cnt, pdata, i, szdp, layout, nt.end); break; case -9: // diam transform_int_data( nt._actual_diam - nt._data, cnt, pdata, i, szdp, layout, nt.end); break; case -5: // pointer assumes a pointer to membrane voltage // or mechanism data in this thread. The value of the // pointer on the NEURON side was analyzed by // nrn_dblpntr2nrncore which returned the // mechanism index and type. At this moment the index // is in pdata and the type is in tmls[type].pointer2type. // However the latter order is according to the nested // iteration for nodecount { for szdp {}} // Also the nodecount POINTER instances of mechanism // might possibly point to differnt range variables. // Therefore it is not possible to use transform_int_data // and the transform must be done one at a time. // So we do nothing here and separately iterate // after this loop instead of the former voltage only /** transform_int_data( nt._actual_v - nt._data, cnt, pdata, i, szdp, layout, nt.end); **/ break; default: if (s >= 0 && s < 1000) { // ion int etype = s; /* if ion is SoA, must recalculate pdata values */ /* if ion is AoS, have to deal with offset */ Memb_list* eml = nt._ml_list[etype]; int edata0 = eml->data - nt._data; int ecnt = eml->nodecount; int esz = corenrn.get_prop_param_size()[etype]; for (int iml = 0; iml < cnt; ++iml) { int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); int ix = *pd; // relative to the ion data nrn_assert((ix >= 0) && (ix < ecnt * esz)); /* Original pd order assumed ecnt groups of esz */ *pd = edata0 + nrn_param_layout(ix, etype, eml); } } } } // Handle case -5 POINTER transformation (see comment above) auto search = type2itml.find(type); if (search != type2itml.end()) { auto& ptypes = tmls[type2itml[type]].pointer2type; assert(ptypes.size()); size_t iptype = 0; for (int iml = 0; iml < cnt; ++iml) { for (int i = 0; i < szdp; ++i) { if (semantics[i] == -5) { // POINTER int* pd = pdata + nrn_i_layout(iml, cnt, i, szdp, layout); int ix = *pd; // relative to elem0 int ptype = ptypes[iptype++]; if (ptype == voltage) { nrn_assert((ix >= 0) && (ix < nt.end)); int elem0 = nt._actual_v - nt._data; *pd = elem0 + ix; } else { Memb_list* pml = nt._ml_list[ptype]; int pcnt = pml->nodecount; int psz = corenrn.get_prop_param_size()[ptype]; nrn_assert((ix >= 0) && (ix < pcnt * psz)); int elem0 = pml->data - nt._data; *pd = elem0 + nrn_param_layout(ix, ptype, pml); } } } } ptypes.clear(); } } } } void Phase2::set_dependencies(const NrnThread& nt, const std::vector& memb_func) { /* here we setup the mechanism dependencies. if there is a mechanism dependency * then we allocate an array for tml->dependencies otherwise set it to nullptr. * In order to find out the "real" dependencies i.e. dependent mechanism * exist at the same compartment, we compare the nodeindices of mechanisms * returned by nrn_mech_depend. */ /* temporary array for dependencies */ int* mech_deps = (int*) ecalloc(memb_func.size(), sizeof(int)); for (auto tml = nt.tml; tml; tml = tml->next) { /* initialize to null */ tml->dependencies = nullptr; tml->ndependencies = 0; /* get dependencies from the models */ int deps_cnt = nrn_mech_depend(tml->index, mech_deps); /* if dependencies, setup dependency array */ if (deps_cnt) { /* store "real" dependencies in the vector */ std::vector actual_mech_deps; Memb_list* ml = tml->ml; int* nodeindices = ml->nodeindices; /* iterate over dependencies */ for (int j = 0; j < deps_cnt; j++) { /* memb_list of dependency mechanism */ Memb_list* dml = nt._ml_list[mech_deps[j]]; /* dependency mechanism may not exist in the model */ if (!dml) continue; /* take nodeindices for comparison */ int* dnodeindices = dml->nodeindices; /* set_intersection function needs temp vector to push the common values */ std::vector node_intersection; /* make sure they have non-zero nodes and find their intersection */ if ((ml->nodecount > 0) && (dml->nodecount > 0)) { std::set_intersection(nodeindices, nodeindices + ml->nodecount, dnodeindices, dnodeindices + dml->nodecount, std::back_inserter(node_intersection)); } /* if they intersect in the nodeindices, it's real dependency */ if (!node_intersection.empty()) { actual_mech_deps.push_back(mech_deps[j]); } } /* copy actual_mech_deps to dependencies */ if (!actual_mech_deps.empty()) { tml->ndependencies = actual_mech_deps.size(); tml->dependencies = (int*) ecalloc(actual_mech_deps.size(), sizeof(int)); std::copy(actual_mech_deps.begin(), actual_mech_deps.end(), tml->dependencies); } } } /* free temp dependency array */ free(mech_deps); } void Phase2::handle_weights(NrnThread& nt, int n_netcon, NrnThreadChkpnt& ntc) { nt.n_weight = weights.size(); // weights in netcons order in groups defined by Point_process target type. nt.weights = (double*) ecalloc_align(nt.n_weight, sizeof(double)); std::copy(weights.begin(), weights.end(), nt.weights); int iw = 0; for (int i = 0; i < n_netcon; ++i) { NetCon& nc = nt.netcons[i]; nc.u.weight_index_ = iw; if (pnttype[i] != 0) { iw += corenrn.get_pnt_receive_size()[pnttype[i]]; } else { iw += 1; } } assert(iw == nt.n_weight); // Nontrivial if FOR_NETCON in use by some mechanisms setup_fornetcon_info(nt); #if CHKPNTDEBUG ntc.delay = new double[n_netcon]; memcpy(ntc.delay, delay.data(), n_netcon * sizeof(double)); #endif for (int i = 0; i < n_netcon; ++i) { NetCon& nc = nt.netcons[i]; nc.delay_ = delay[i]; } } void Phase2::get_info_from_bbcore(NrnThread& nt, const std::vector& memb_func, NrnThreadChkpnt& ntc) { // BBCOREPOINTER information #if CHKPNTDEBUG ntc.nbcp = num_point_process; ntc.bcpicnt = new int[n_mech]; ntc.bcpdcnt = new int[n_mech]; ntc.bcptype = new int[n_mech]; size_t point_proc_id = 0; #endif for (int i = 0; i < n_mech; ++i) { int type = mech_types[i]; if (!corenrn.get_bbcore_read()[type]) { continue; } type = tmls[i].type; // This is not an error, but it has to be fixed I think #if CHKPNTDEBUG ntc.bcptype[point_proc_id] = type; ntc.bcpicnt[point_proc_id] = tmls[i].iArray.size(); ntc.bcpdcnt[point_proc_id] = tmls[i].dArray.size(); point_proc_id++; #endif int ik = 0; int dk = 0; Memb_list* ml = nt._ml_list[type]; int dsz = corenrn.get_prop_param_size()[type]; int pdsz = corenrn.get_prop_dparam_size()[type]; int cntml = ml->nodecount; int layout = corenrn.get_mech_data_layout()[type]; for (int j = 0; j < cntml; ++j) { int jp = j; if (ml->_permute) { jp = ml->_permute[j]; } double* d = ml->data; Datum* pd = ml->pdata; d += nrn_i_layout(jp, cntml, 0, dsz, layout); pd += nrn_i_layout(jp, cntml, 0, pdsz, layout); int aln_cntml = nrn_soa_padded_size(cntml, layout); (*corenrn.get_bbcore_read()[type])(tmls[i].dArray.data(), tmls[i].iArray.data(), &dk, &ik, 0, aln_cntml, d, pd, ml->_thread, &nt, ml, 0.0); } assert(dk == static_cast(tmls[i].dArray.size())); assert(ik == static_cast(tmls[i].iArray.size())); } } void Phase2::set_vec_play(NrnThread& nt, NrnThreadChkpnt& ntc) { // VecPlayContinuous instances // No attempt at memory efficiency nt.n_vecplay = vec_play_continuous.size(); if (nt.n_vecplay) { nt._vecplay = new void*[nt.n_vecplay]; } else { nt._vecplay = nullptr; } #if CHKPNTDEBUG ntc.vecplay_ix = new int[nt.n_vecplay]; ntc.vtype = new int[nt.n_vecplay]; ntc.mtype = new int[nt.n_vecplay]; #endif for (int i = 0; i < nt.n_vecplay; ++i) { auto& vecPlay = vec_play_continuous[i]; nrn_assert(vecPlay.vtype == VecPlayContinuousType); #if CHKPNTDEBUG ntc.vtype[i] = vecPlay.vtype; #endif #if CHKPNTDEBUG ntc.mtype[i] = vecPlay.mtype; #endif Memb_list* ml = nt._ml_list[vecPlay.mtype]; #if CHKPNTDEBUG ntc.vecplay_ix[i] = vecPlay.ix; #endif vecPlay.ix = nrn_param_layout(vecPlay.ix, vecPlay.mtype, ml); if (ml->_permute) { vecPlay.ix = nrn_index_permute(vecPlay.ix, vecPlay.mtype, ml); } nt._vecplay[i] = new VecPlayContinuous(ml->data + vecPlay.ix, std::move(vecPlay.yvec), std::move(vecPlay.tvec), nullptr, nt.id); } } void Phase2::populate(NrnThread& nt, const UserParams& userParams) { NrnThreadChkpnt& ntc = nrnthread_chkpnt[nt.id]; ntc.file_id = userParams.gidgroups[nt.id]; nt.ncell = n_real_cell; nt.end = n_node; nt.n_real_output = n_real_output; #if CHKPNTDEBUG ntc.n_outputgids = n_output; ntc.nmech = n_mech; #endif /// Checkpoint in coreneuron is defined for both phase 1 and phase 2 since they are written /// together nt._ml_list = (Memb_list**) ecalloc_align(corenrn.get_memb_funcs().size(), sizeof(Memb_list*)); auto& memb_func = corenrn.get_memb_funcs(); #if CHKPNTDEBUG ntc.mlmap = new Memb_list_chkpnt*[memb_func.size()]; for (int i = 0; i < memb_func.size(); ++i) { ntc.mlmap[i] = nullptr; } #endif nt.stream_id = 0; nt.compute_gpu = 0; auto& nrn_prop_param_size_ = corenrn.get_prop_param_size(); auto& nrn_prop_dparam_size_ = corenrn.get_prop_dparam_size(); /* read_phase2 is being called from openmp region * and hence we can set the stream equal to current thread id. * In fact we could set gid as stream_id when we will have nrn threads * greater than number of omp threads. */ #if defined(_OPENMP) nt.stream_id = omp_get_thread_num(); #endif int shadow_rhs_cnt = 0; nt.shadow_rhs_cnt = 0; NrnThreadMembList* tml_last = nullptr; for (int i = 0; i < n_mech; ++i) { auto tml = create_tml(nt, i, memb_func[mech_types[i]], shadow_rhs_cnt, mech_types, nodecounts); nt._ml_list[tml->index] = tml->ml; #if CHKPNTDEBUG Memb_list_chkpnt* mlc = new Memb_list_chkpnt; ntc.mlmap[tml->index] = mlc; #endif if (nt.tml) { tml_last->next = tml; } else { nt.tml = tml; } tml_last = tml; } if (shadow_rhs_cnt) { nt._shadow_rhs = (double*) ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double)); nt._shadow_d = (double*) ecalloc_align(nrn_soa_padded_size(shadow_rhs_cnt, 0), sizeof(double)); nt.shadow_rhs_cnt = shadow_rhs_cnt; } nt.mapping = nullptr; // section segment mapping nt._nidata = n_idata; if (nt._nidata) nt._idata = (int*) ecalloc(nt._nidata, sizeof(int)); else nt._idata = nullptr; // see patternstim.cpp int extra_nv = (&nt == nrn_threads) ? nrn_extra_thread0_vdata : 0; nt._nvdata = n_vdata; if (nt._nvdata + extra_nv) nt._vdata = (void**) ecalloc_align(nt._nvdata + extra_nv, sizeof(void*)); else nt._vdata = nullptr; // The data format begins with the matrix data int n_data_padded = nrn_soa_padded_size(nt.end, SOA_LAYOUT); nt._data = _data; nt._actual_rhs = nt._data + 0 * n_data_padded; nt._actual_d = nt._data + 1 * n_data_padded; nt._actual_a = nt._data + 2 * n_data_padded; nt._actual_b = nt._data + 3 * n_data_padded; nt._actual_v = nt._data + 4 * n_data_padded; nt._actual_area = nt._data + 5 * n_data_padded; nt._actual_diam = n_diam ? nt._data + 6 * n_data_padded : nullptr; size_t offset = 6 * n_data_padded; if (n_diam) { // in the rare case that a mechanism has dparam with diam semantics // then actual_diam array added after matrix in nt._data // Generally wasteful since only a few diam are pointed to. // Probably better to move the diam semantics to the p array of the mechanism offset += n_data_padded; } // Memb_list.data points into the nt._data array. // Also count the number of Point_process int num_point_process = 0; for (auto tml = nt.tml; tml; tml = tml->next) { Memb_list* ml = tml->ml; int type = tml->index; int layout = corenrn.get_mech_data_layout()[type]; int n = ml->nodecount; int sz = nrn_prop_param_size_[type]; offset = nrn_soa_byte_align(offset); ml->data = nt._data + offset; offset += nrn_soa_padded_size(n, layout) * sz; if (corenrn.get_pnt_map()[type] > 0) { num_point_process += n; } } nt.pntprocs = (Point_process*) ecalloc_align(num_point_process, sizeof(Point_process)); // includes acell with and // without gid nt.n_pntproc = num_point_process; nt._ndata = offset; // matrix info nt._v_parent_index = v_parent_index; #if CHKPNTDEBUG ntc.parent = new int[nt.end]; memcpy(ntc.parent, nt._v_parent_index, nt.end * sizeof(int)); ntc.area = new double[nt.end]; memcpy(ntc.area, nt._actual_area, nt.end * sizeof(double)); #endif int synoffset = 0; std::vector pnt_offset(memb_func.size()); // All the mechanism data and pdata. // Also fill in the pnt_offset // Complete spec of Point_process except for the acell presyn_ field. int itml = 0; for (auto tml = nt.tml; tml; tml = tml->next, ++itml) { int type = tml->index; Memb_list* ml = tml->ml; int n = ml->nodecount; int szp = nrn_prop_param_size_[type]; int szdp = nrn_prop_dparam_size_[type]; int layout = corenrn.get_mech_data_layout()[type]; ml->nodeindices = (int*) ecalloc_align(ml->nodecount, sizeof(int)); std::copy(tmls[itml].nodeindices.begin(), tmls[itml].nodeindices.end(), ml->nodeindices); mech_data_layout_transform(ml->data, n, szp, layout); if (szdp) { ml->pdata = (int*) ecalloc_align(nrn_soa_padded_size(n, layout) * szdp, sizeof(int)); std::copy(tmls[itml].pdata.begin(), tmls[itml].pdata.end(), ml->pdata); mech_data_layout_transform(ml->pdata, n, szdp, layout); #if CHKPNTDEBUG // Not substantive. Only for debugging. Memb_list_chkpnt* mlc = ntc.mlmap[type]; mlc->pdata_not_permuted = (int*) coreneuron::ecalloc_align(n * szdp, sizeof(int)); if (layout == Layout::AoS) { // only copy for (int i = 0; i < n; ++i) { for (int j = 0; j < szdp; ++j) { mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i * szdp + j]; } } } else if (layout == Layout::SoA) { // transpose and unpad int align_cnt = nrn_soa_padded_size(n, layout); for (int i = 0; i < n; ++i) { for (int j = 0; j < szdp; ++j) { mlc->pdata_not_permuted[i * szdp + j] = ml->pdata[i + j * align_cnt]; } } } #endif } else { ml->pdata = nullptr; } if (corenrn.get_pnt_map()[type] > 0) { // POINT_PROCESS mechanism including acell int cnt = ml->nodecount; Point_process* pnt = nullptr; pnt = nt.pntprocs + synoffset; pnt_offset[type] = synoffset; synoffset += cnt; for (int i = 0; i < cnt; ++i) { Point_process* pp = pnt + i; pp->_type = type; pp->_i_instance = i; nt._vdata[ml->pdata[nrn_i_layout(i, cnt, 1, szdp, layout)]] = pp; pp->_tid = nt.id; } } } // pnt_offset needed for SelfEvent transfer from NEURON. Not needed on GPU. // Ugh. Related but not same as NetReceiveBuffer._pnt_offset nt._pnt_offset = pnt_offset; pdata_relocation(nt, memb_func); /* if desired, apply the node permutation. This involves permuting at least the node parameter arrays for a, b, and area (and diam) and all integer vector values that index into nodes. This could have been done when originally filling the arrays with AoS ordered data, but can also be done now, after the SoA transformation. The latter has the advantage that the present order is consistent with all the layout values. Note that after this portion of the permutation, a number of other node index vectors will be read and will need to be permuted as well in subsequent sections of this function. */ if (interleave_permute_type) { nt._permute = interleave_order(nt.id, nt.ncell, nt.end, nt._v_parent_index); } if (nt._permute) { int* p = nt._permute; permute_data(nt._actual_a, nt.end, p); permute_data(nt._actual_b, nt.end, p); permute_data(nt._actual_area, nt.end, p); permute_data(nt._actual_v, nt.end, p); // need if restore or finitialize does not initialize voltage if (nt._actual_diam) { permute_data(nt._actual_diam, nt.end, p); } // index values change as well as ordering permute_ptr(nt._v_parent_index, nt.end, p); node_permute(nt._v_parent_index, nt.end, p); #if CORENRN_DEBUG for (int i = 0; i < nt.end; ++i) { printf("parent[%d] = %d\n", i, nt._v_parent_index[i]); } #endif // specify the ml->_permute and sort the nodeindices // Have to calculate all the permute before updating pdata in case // POINTER to data of other mechanisms exist. for (auto tml = nt.tml; tml; tml = tml->next) { if (tml->ml->nodeindices) { // not artificial permute_nodeindices(tml->ml, p); } } for (auto tml = nt.tml; tml; tml = tml->next) { if (tml->ml->nodeindices) { // not artificial permute_ml(tml->ml, tml->index, nt); } } // permute the Point_process._i_instance for (int i = 0; i < nt.n_pntproc; ++i) { Point_process& pp = nt.pntprocs[i]; Memb_list* ml = nt._ml_list[pp._type]; if (ml->_permute) { pp._i_instance = ml->_permute[pp._i_instance]; } } } set_dependencies(nt, memb_func); fill_before_after_lists(nt, memb_func); // for fast watch statement checking // setup a list of types that have WATCH statement { int sz = 0; // count the types with WATCH for (auto tml = nt.tml; tml; tml = tml->next) { if (corenrn.get_watch_check()[tml->index]) { ++sz; } } if (sz) { nt._watch_types = (int*) ecalloc(sz + 1, sizeof(int)); // nullptr terminated sz = 0; for (auto tml = nt.tml; tml; tml = tml->next) { if (corenrn.get_watch_check()[tml->index]) { nt._watch_types[sz++] = tml->index; } } } } auto& pnttype2presyn = corenrn.get_pnttype2presyn(); auto& nrn_has_net_event_ = corenrn.get_has_net_event(); // create the nt.pnt2presyn_ix array of arrays. nt.pnt2presyn_ix = (int**) ecalloc(nrn_has_net_event_.size(), sizeof(int*)); for (size_t i = 0; i < nrn_has_net_event_.size(); ++i) { Memb_list* ml = nt._ml_list[nrn_has_net_event_[i]]; if (ml && ml->nodecount > 0) { nt.pnt2presyn_ix[i] = (int*) ecalloc(ml->nodecount, sizeof(int)); } } // Real cells are at the beginning of the nt.presyns followed by // acells (with and without gids mixed together) // Here we associate the real cells with voltage pointers and // acell PreSyn with the Point_process. // nt.presyns order same as output_vindex order #if CHKPNTDEBUG ntc.output_vindex = new int[nt.n_presyn]; memcpy(ntc.output_vindex, output_vindex.data(), nt.n_presyn * sizeof(int)); #endif if (nt._permute) { // only indices >= 0 (i.e. _actual_v indices) will be changed. node_permute(output_vindex.data(), nt.n_presyn, nt._permute); } #if CHKPNTDEBUG ntc.output_threshold = new double[n_real_output]; memcpy(ntc.output_threshold, output_threshold.data(), n_real_output * sizeof(double)); #endif for (int i = 0; i < nt.n_presyn; ++i) { // real cells PreSyn* ps = nt.presyns + i; int ix = output_vindex[i]; if (ix == -1 && i < n_real_output) { // real cell without a presyn continue; } if (ix < 0) { ix = -ix; int index = ix / 1000; int type = ix % 1000; Point_process* pnt = nt.pntprocs + (pnt_offset[type] + index); ps->pntsrc_ = pnt; // pnt->_presyn = ps; int ip2ps = pnttype2presyn[pnt->_type]; if (ip2ps >= 0) { nt.pnt2presyn_ix[ip2ps][pnt->_i_instance] = i; } if (ps->gid_ < 0) { ps->gid_ = -1; } } else { assert(ps->gid_ > -1); ps->thvar_index_ = ix; // index into _actual_v assert(ix < nt.end); ps->threshold_ = output_threshold[i]; } } // initial net_send_buffer size about 1% of number of presyns // nt._net_send_buffer_size = nt.ncell/100 + 1; // but, to avoid reallocation complexity on GPU ... nt._net_send_buffer_size = n_real_output; nt._net_send_buffer = (int*) ecalloc_align(nt._net_send_buffer_size, sizeof(int)); int nnetcon = nt.n_netcon; // it may happen that Point_process structures will be made unnecessary // by factoring into NetCon. #if CHKPNTDEBUG ntc.pnttype = new int[nnetcon]; ntc.pntindex = new int[nnetcon]; memcpy(ntc.pnttype, pnttype.data(), nnetcon * sizeof(int)); memcpy(ntc.pntindex, pntindex.data(), nnetcon * sizeof(int)); #endif for (int i = 0; i < nnetcon; ++i) { int type = pnttype[i]; if (type > 0) { int index = pnt_offset[type] + pntindex[i]; /// Potentially uninitialized pnt_offset[], /// check for previous assignments NetCon& nc = nt.netcons[i]; nc.target_ = nt.pntprocs + index; nc.active_ = true; } } handle_weights(nt, nnetcon, ntc); get_info_from_bbcore(nt, memb_func, ntc); set_vec_play(nt, ntc); if (!events.empty()) { userParams.checkPoints.restore_tqueue(nt, *this); } set_net_send_buffer(nt._ml_list, pnt_offset); } } // namespace coreneuron ================================================ FILE: coreneuron/io/phase2.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/io/nrn_filehandler.hpp" #include "coreneuron/io/user_params.hpp" #include "coreneuron/utils/ivocvect.hpp" #include namespace coreneuron { struct NrnThread; struct NrnThreadMembList; struct Memb_func; struct Memb_list; struct NrnThreadChkpnt; class Phase2 { public: void read_file(FileHandler& F, const NrnThread& nt); void read_direct(int thread_id, const NrnThread& nt); void populate(NrnThread& nt, const UserParams& userParams); std::vector preSynConditionEventFlags; // All of this is public for nrn_checkpoint struct EventTypeBase { double time; }; struct NetConType_: public EventTypeBase { int netcon_index; }; struct SelfEventType_: public EventTypeBase { int target_type; int point_proc_instance; int target_instance; double flag; int movable; int weight_index; }; struct PreSynType_: public EventTypeBase { int presyn_index; }; struct NetParEvent_: public EventTypeBase {}; struct PlayRecordEventType_: public EventTypeBase { int play_record_type; int vecplay_index; }; struct VecPlayContinuous_ { int vtype; int mtype; int ix; IvocVect yvec; IvocVect tvec; int last_index; int discon_index; int ubound_index; }; std::vector vec_play_continuous; int patstim_index; std::vector>> events; private: void check_mechanism(); void transform_int_data(int elem0, int nodecount, int* pdata, int i, int dparam_size, int layout, int n_node_); void set_net_send_buffer(Memb_list** ml_list, const std::vector& pnt_offset); void restore_events(FileHandler& F); void fill_before_after_lists(NrnThread& nt, const std::vector& memb_func); void pdata_relocation(const NrnThread& nt, const std::vector& memb_func); void set_dependencies(const NrnThread& nt, const std::vector& memb_func); void handle_weights(NrnThread& nt, int n_netcon, NrnThreadChkpnt& ntc); void get_info_from_bbcore(NrnThread& nt, const std::vector& memb_func, NrnThreadChkpnt& ntc); void set_vec_play(NrnThread& nt, NrnThreadChkpnt& ntc); int n_real_cell; int n_output; int n_real_output; int n_node; int n_diam; // 0 if not needed, else n_node int n_mech; std::vector mech_types; std::vector nodecounts; int n_idata; int n_vdata; int* v_parent_index; /* TO DO: when this is fixed use it like that std::vector actual_a; std::vector actual_b; std::vector actual_area; std::vector actual_v; std::vector actual_diam; */ double* _data; struct TML { std::vector nodeindices; std::vector pdata; int type; std::vector iArray; std::vector dArray; std::vector pointer2type; }; std::vector tmls; std::vector output_vindex; std::vector output_threshold; std::vector pnttype; std::vector pntindex; std::vector weights; std::vector delay; int num_point_process; }; } // namespace coreneuron ================================================ FILE: coreneuron/io/prcellstate.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/nrn_setup.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #define precision 15 namespace coreneuron { static std::map pnt2index; // for deciding if NetCon is to be printed static int pntindex; // running count of printed point processes. static std::map map_nc2src; static std::vector* inv_permute_; static int permute(int i, NrnThread& nt) { return nt._permute ? nt._permute[i] : i; } static int inv_permute(int i, NrnThread& nt) { nrn_assert(i >= 0 && i < nt.end); if (!nt._permute) { return i; } if (!inv_permute_) { inv_permute_ = new std::vector(nt.end); for (int i = 0; i < nt.end; ++i) { (*inv_permute_)[nt._permute[i]] = i; } } return (*inv_permute_)[i]; } static int ml_permute(int i, Memb_list* ml) { return ml->_permute ? ml->_permute[i] : i; } // Note: cellnodes array is in unpermuted order. static void pr_memb(int type, Memb_list* ml, int* cellnodes, NrnThread& nt, FILE* f) { if (corenrn.get_is_artificial()[type]) return; bool header_printed = false; int size = corenrn.get_prop_param_size()[type]; int psize = corenrn.get_prop_dparam_size()[type]; bool receives_events = corenrn.get_pnt_receive()[type]; int layout = corenrn.get_mech_data_layout()[type]; int cnt = ml->nodecount; for (int iorig = 0; iorig < ml->nodecount; ++iorig) { // original index int i = ml_permute(iorig, ml); // present index int inode = ml->nodeindices[i]; // inode is the permuted node int cix = cellnodes[inv_permute(inode, nt)]; // original index relative to this cell if (cix >= 0) { if (!header_printed) { header_printed = true; fprintf(f, "type=%d %s size=%d\n", type, corenrn.get_memb_func(type).sym, size); } if (receives_events) { fprintf(f, "%d nri %d\n", cix, pntindex); int k = nrn_i_layout(i, cnt, 1, psize, layout); Point_process* pp = (Point_process*) nt._vdata[ml->pdata[k]]; pnt2index[pp] = pntindex; ++pntindex; } for (int j = 0; j < size; ++j) { int k = nrn_i_layout(i, cnt, j, size, layout); fprintf(f, " %d %d %.*g\n", cix, j, precision, ml->data[k]); } } } } static void pr_netcon(NrnThread& nt, FILE* f) { if (pntindex == 0) { return; } // pnt2index table has been filled // List of NetCon for each of the NET_RECEIVE point process instances // Also create the initial map of NetCon <-> DiscreteEvent (PreSyn) std::vector> nclist(pntindex); map_nc2src.clear(); int nc_cnt = 0; for (int i = 0; i < nt.n_netcon; ++i) { NetCon* nc = nt.netcons + i; Point_process* pp = nc->target_; std::map::iterator it = pnt2index.find(pp); if (it != pnt2index.end()) { nclist[it->second].push_back(nc); map_nc2src[nc] = nullptr; ++nc_cnt; } } fprintf(f, "netcons %d\n", nc_cnt); fprintf(f, " pntindex srcgid active delay weights\n"); /// Fill the NetCon <-> DiscreteEvent map with PreSyn-s // presyns can come from any thread for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& ntps = nrn_threads[ith]; for (int i = 0; i < ntps.n_presyn; ++i) { PreSyn* ps = ntps.presyns + i; for (int j = 0; j < ps->nc_cnt_; ++j) { NetCon* nc = netcon_in_presyn_order_[ps->nc_index_ + j]; auto it_nc2src = map_nc2src.find(nc); if (it_nc2src != map_nc2src.end()) { it_nc2src->second = ps; } } } } /// Fill the NetCon <-> DiscreteEvent map with InputPreSyn-s /// Traverse gid <-> InputPreSyn map and loop over NetCon-s of the /// correspondent InputPreSyn. If NetCon is in the nc2src map, /// remember its ips and the gid std::map map_nc2gid; for (const auto& gid: gid2in) { InputPreSyn* ips = gid.second; /// input presyn for (int i = 0; i < ips->nc_cnt_; ++i) { NetCon* nc = netcon_in_presyn_order_[ips->nc_index_ + i]; auto it_nc2src = map_nc2src.find(nc); if (it_nc2src != map_nc2src.end()) { it_nc2src->second = ips; map_nc2gid[nc] = gid.first; /// src gid of the input presyn } } } for (int i = 0; i < pntindex; ++i) { for (int j = 0; j < (int) (nclist[i].size()); ++j) { NetCon* nc = nclist[i][j]; int srcgid = -3; auto it_nc2src = map_nc2src.find(nc); if (it_nc2src != map_nc2src.end()) { // seems like there should be no NetCon which is // not in the map DiscreteEvent* de = it_nc2src->second; if (de && de->type() == PreSynType) { PreSyn* ps = (PreSyn*) de; srcgid = ps->gid_; Point_process* pnt = ps->pntsrc_; if (srcgid < 0 && pnt) { int type = pnt->_type; fprintf(f, "%d %s %d %.*g", i, corenrn.get_memb_func(type).sym, nc->active_ ? 1 : 0, precision, nc->delay_); } else if (srcgid < 0 && ps->thvar_index_ > 0) { fprintf( f, "%d %s %d %.*g", i, "v", nc->active_ ? 1 : 0, precision, nc->delay_); } else { fprintf(f, "%d %d %d %.*g", i, srcgid, nc->active_ ? 1 : 0, precision, nc->delay_); } } else { fprintf(f, "%d %d %d %.*g", i, map_nc2gid[nc], nc->active_ ? 1 : 0, precision, nc->delay_); } } else { fprintf(f, "%d %d %d %.*g", i, srcgid, nc->active_ ? 1 : 0, precision, nc->delay_); } int wcnt = corenrn.get_pnt_receive_size()[nc->target_->_type]; for (int k = 0; k < wcnt; ++k) { fprintf(f, " %.*g", precision, nt.weights[nc->u.weight_index_ + k]); } fprintf(f, "\n"); } } // cleanup nclist.clear(); } static void pr_realcell(PreSyn& ps, NrnThread& nt, FILE* f) { // for associating NetCons with Point_process identifiers pntindex = 0; // threshold variable is a voltage printf("thvar_index_=%d end=%d\n", inv_permute(ps.thvar_index_, nt), nt.end); if (ps.thvar_index_ < 0 || ps.thvar_index_ >= nt.end) { hoc_execerror("gid not associated with a voltage", 0); } int inode = ps.thvar_index_; // and the root node is ... int rnode = inode; while (rnode >= nt.ncell) { rnode = nt._v_parent_index[rnode]; } // count the number of nodes in the cell // do not assume all cell nodes except the root are contiguous // cellnodes is an unpermuted vector int* cellnodes = new int[nt.end]; for (int i = 0; i < nt.end; ++i) { cellnodes[i] = -1; } int cnt = 0; cellnodes[inv_permute(rnode, nt)] = cnt++; for (int i = nt.ncell; i < nt.end; ++i) { // think of it as unpermuted order if (cellnodes[inv_permute(nt._v_parent_index[permute(i, nt)], nt)] >= 0) { cellnodes[i] = cnt++; } } fprintf(f, "%d nodes %d is the threshold node\n", cnt, cellnodes[inv_permute(inode, nt)] - 1); fprintf(f, " threshold %.*g\n", precision, ps.threshold_); fprintf(f, "inode parent area a b\n"); for (int iorig = 0; iorig < nt.end; ++iorig) if (cellnodes[iorig] >= 0) { int i = permute(iorig, nt); int ip = nt._v_parent_index[i]; fprintf(f, "%d %d %.*g %.*g %.*g\n", cellnodes[iorig], ip >= 0 ? cellnodes[inv_permute(ip, nt)] : -1, precision, nt._actual_area[i], precision, nt._actual_a[i], precision, nt._actual_b[i]); } fprintf(f, "inode v\n"); for (int i = 0; i < nt.end; ++i) if (cellnodes[i] >= 0) { fprintf(f, "%d %.*g\n", cellnodes[i], precision, nt._actual_v[permute(i, nt)]); } // each mechanism for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { pr_memb(tml->index, tml->ml, cellnodes, nt, f); } // the NetCon info (uses pnt2index) pr_netcon(nt, f); delete[] cellnodes; pnt2index.clear(); if (inv_permute_) { delete inv_permute_; inv_permute_ = nullptr; } } int prcellstate(int gid, const char* suffix) { // search the NrnThread.presyns for the gid for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; for (int ip = 0; ip < nt.n_presyn; ++ip) { PreSyn& ps = nt.presyns[ip]; if (ps.output_index_ == gid) { // found it so create a _.corenrn file std::string filename = std::to_string(gid) + "_" + suffix + ".corenrn"; FILE* f = fopen(filename.c_str(), "w"); assert(f); fprintf(f, "gid = %d\n", gid); fprintf(f, "t = %.*g\n", precision, nt._t); fprintf(f, "celsius = %.*g\n", precision, celsius); if (ps.thvar_index_ >= 0) { pr_realcell(ps, nt, f); } fclose(f); return 1; } } } return 0; } } // namespace coreneuron ================================================ FILE: coreneuron/io/prcellstate.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once namespace coreneuron { extern int prcellstate(int gid, const char* suffix); } // namespace coreneuron ================================================ FILE: coreneuron/io/reports/binary_report_handler.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "binary_report_handler.hpp" #ifdef ENABLE_BIN_REPORTS #include "reportinglib/Records.h" #endif // ENABLE_BIN_REPORTS namespace coreneuron { void BinaryReportHandler::create_report(ReportConfiguration& config, double dt, double tstop, double delay) { #ifdef ENABLE_BIN_REPORTS records_set_atomic_step(dt); #endif // ENABLE_BIN_REPORTS ReportHandler::create_report(config, dt, tstop, delay); } #ifdef ENABLE_BIN_REPORTS static void create_soma_extra(const CellMapping& mapping, std::array& extra) { extra = {1, 0, 0, 0, 0}; /* report extra "mask" all infos not written in report: here only soma count is reported */ extra[1] = mapping.get_seclist_segment_count("soma"); } static void create_compartment_extra(const CellMapping& mapping, std::array& extra) { extra[1] = mapping.get_seclist_section_count("soma"); extra[2] = mapping.get_seclist_section_count("axon"); extra[3] = mapping.get_seclist_section_count("dend"); extra[4] = mapping.get_seclist_section_count("apic"); extra[0] = std::accumulate(extra.begin() + 1, extra.end(), 0); } static void create_custom_extra(const CellMapping& mapping, std::array& extra) { extra = {1, 0, 0, 0, 1}; extra[1] = mapping.get_seclist_section_count("soma"); // extra[2] and extra[3] extra[4] = mapping.get_seclist_section_count("apic"); extra[0] = std::accumulate(extra.begin() + 1, extra.end(), 0); } void BinaryReportHandler::register_section_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, bool is_soma_target) { create_extra_func create_extra = is_soma_target ? create_soma_extra : create_compartment_extra; register_report(nt, config, vars_to_report, create_extra); } void BinaryReportHandler::register_custom_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report) { create_extra_func create_extra = create_custom_extra; register_report(nt, config, vars_to_report, create_extra); } void BinaryReportHandler::register_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, create_extra_func& create_extra) { int sizemapping = 1; int extramapping = 5; std::array mapping = {0}; std::array extra; for (const auto& var: vars_to_report) { int gid = var.first; auto& vars = var.second; if (vars.empty()) { continue; } const auto* mapinfo = static_cast(nt.mapping); const CellMapping* m = mapinfo->get_cell_mapping(gid); extra[0] = vars.size(); create_extra(*m, extra); records_add_report(config.output_path.data(), gid, gid, gid, config.start, config.stop, config.report_dt, sizemapping, config.type_str.data(), extramapping, config.unit.data()); records_set_report_max_buffer_size_hint(config.output_path.data(), config.buffer_size); records_extra_mapping(config.output_path.data(), gid, 5, extra.data()); for (const auto& var: vars) { mapping[0] = var.id; records_add_var_with_mapping( config.output_path.data(), gid, var.var_value, sizemapping, mapping.data()); } } } #endif // ENABLE_BIN_REPORTS } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/binary_report_handler.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include #include #include #include "report_handler.hpp" #include "coreneuron/io/nrnsection_mapping.hpp" namespace coreneuron { class BinaryReportHandler: public ReportHandler { public: void create_report(ReportConfiguration& config, double dt, double tstop, double delay) override; #ifdef ENABLE_BIN_REPORTS void register_section_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, bool is_soma_target) override; void register_custom_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report) override; private: using create_extra_func = std::function&)>; void register_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, create_extra_func& create_extra); #endif // ENABLE_BIN_REPORTS }; } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/nrnreport.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include #include #include "coreneuron/network/netcon.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/reports/nrnreport.hpp" #include "coreneuron/io/nrnsection_mapping.hpp" #include "coreneuron/mechanism/mech_mapping.hpp" #include "coreneuron/mechanism/membfunc.hpp" #ifdef ENABLE_BIN_REPORTS #include "reportinglib/Records.h" #endif #ifdef ENABLE_SONATA_REPORTS #include "bbp/sonata/reports.h" #endif namespace coreneuron { // Size in MB of the report buffer static int size_report_buffer = 4; void nrn_flush_reports(double t) { // flush before buffer is full #ifdef ENABLE_BIN_REPORTS records_end_iteration(t); #endif #ifdef ENABLE_SONATA_REPORTS sonata_check_and_flush(t); #endif } /** in the current implementation, we call flush during every spike exchange * interval. Hence there should be sufficient buffer to hold all reports * for the duration of mindelay interval. In the below call we specify the * number of timesteps that we have to buffer. * TODO: revisit this because spike exchange can happen few steps before/after * mindelay interval and hence adding two extra timesteps to buffer. */ void setup_report_engine(double dt_report, double mindelay) { int min_steps_to_record = static_cast(std::round(mindelay / dt_report)); static_cast(min_steps_to_record); #ifdef ENABLE_BIN_REPORTS records_set_min_steps_to_record(min_steps_to_record); records_setup_communicator(); records_finish_and_share(); #endif #ifdef ENABLE_SONATA_REPORTS sonata_set_min_steps_to_record(min_steps_to_record); sonata_setup_communicators(); sonata_prepare_datasets(); #endif } // Size in MB of the report buffers void set_report_buffer_size(int n) { size_report_buffer = n; #ifdef ENABLE_BIN_REPORTS records_set_max_buffer_size_hint(size_report_buffer); #endif #ifdef ENABLE_SONATA_REPORTS sonata_set_max_buffer_size_hint(size_report_buffer); #endif } void finalize_report() { #ifdef ENABLE_BIN_REPORTS records_flush(nrn_threads[0]._t); #endif #ifdef ENABLE_SONATA_REPORTS sonata_flush(nrn_threads[0]._t); #endif } } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/nrnreport.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ /** * @file nrnreport.h * @brief interface with reportinglib for soma reports */ #ifndef _H_NRN_REPORT_ #define _H_NRN_REPORT_ #include #include #include #include #include #define REPORT_MAX_NAME_LEN 256 #define REPORT_MAX_FILEPATH_LEN 4096 namespace coreneuron { struct SummationReport { // Contains the values of the summation with index == segment_id std::vector summation_ = {}; // Map containing the pointers of the currents and its scaling factor for every segment_id std::unordered_map>> currents_; // Map containing the list of segment_ids per gid std::unordered_map> gid_segments_; }; struct SummationReportMapping { // Map containing a SummationReport object per report std::unordered_map summation_reports_; }; struct SpikesInfo { std::string file_name = "out"; std::vector> population_info; }; // name of the variable in mod file that is used to indicate which synapse // is enabled or disable for reporting #define SELECTED_VAR_MOD_NAME "selected_for_report" /// name of the variable in mod file used for setting synapse id #define SYNAPSE_ID_MOD_NAME "synapseID" /* * Defines the type of target, as per the following syntax: * 0=Compartment, 1=Cell/Soma, Section { 2=Axon, 3=Dendrite, 4=Apical } * The "Comp" variations are compartment-based (all segments, not middle only) */ enum class TargetType { Compartment = 0, Cell = 1, SectionSoma = 2, SectionAxon = 3, SectionDendrite = 4, SectionApical = 5, SectionSomaAll = 6, SectionAxonAll = 7, SectionDendriteAll = 8, SectionApicalAll = 9, }; // enumerate that defines the type of target report requested enum ReportType { SomaReport, CompartmentReport, SynapseReport, IMembraneReport, SectionReport, SummationReport }; // enumerate that defines the section type for a Section report enum SectionType { Cell, Soma, Axon, Dendrite, Apical, All }; struct ReportConfiguration { std::string name; // name of the report std::string output_path; // full path of the report std::string target_name; // target of the report std::vector mech_names; // mechanism names std::vector var_names; // variable names std::vector mech_ids; // mechanisms std::string unit; // unit of the report std::string format; // format of the report (Bin, hdf5, SONATA) std::string type_str; // type of report string TargetType target_type; // type of the target ReportType type; // type of the report SectionType section_type; // type of section report bool section_all_compartments; // flag for section report (all values) double report_dt; // reporting timestep double start; // start time of report double stop; // stop time of report int num_gids; // total number of gids int buffer_size; // hint on buffer size used for this report std::vector target; // list of gids for this report }; void setup_report_engine(double dt_report, double mindelay); std::vector create_report_configurations(const std::string& filename, const std::string& output_dir, SpikesInfo& spikes_info); void finalize_report(); void nrn_flush_reports(double t); void set_report_buffer_size(int n); } // namespace coreneuron #endif //_H_NRN_REPORT_ ================================================ FILE: coreneuron/io/reports/report_configuration_parser.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include #include #include #include #include #include #include "coreneuron/io/reports/nrnreport.hpp" #include "coreneuron/mechanism/mech_mapping.hpp" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/utils/utils.hpp" namespace coreneuron { /* * Split filter comma separated strings ("mech.var_name") into mech_name and var_name */ void parse_filter_string(const std::string& filter, ReportConfiguration& config) { std::vector mechanisms; std::stringstream ss(filter); std::string mechanism; // Multiple report variables are separated by `,` while (getline(ss, mechanism, ',')) { mechanisms.push_back(mechanism); // Split mechanism name and corresponding reporting variable std::string mech_name; std::string var_name; std::istringstream iss(mechanism); std::getline(iss, mech_name, '.'); std::getline(iss, var_name, '.'); if (var_name.empty()) { var_name = "i"; } config.mech_names.emplace_back(mech_name); config.var_names.emplace_back(var_name); if (mech_name == "i_membrane") { nrn_use_fast_imem = true; } } } void register_target_type(ReportConfiguration& report, ReportType report_type) { report.type = report_type; switch (report.target_type) { case TargetType::Compartment: report.section_type = All; report.section_all_compartments = true; break; case TargetType::Cell: report.section_type = Cell; report.section_all_compartments = false; break; case TargetType::SectionSoma: report.section_type = Soma; report.section_all_compartments = false; break; case TargetType::SectionSomaAll: report.section_type = Soma; report.section_all_compartments = true; break; case TargetType::SectionAxon: report.section_type = Axon; report.section_all_compartments = false; break; case TargetType::SectionAxonAll: report.section_type = Axon; report.section_all_compartments = true; break; case TargetType::SectionDendrite: report.section_type = Dendrite; report.section_all_compartments = false; break; case TargetType::SectionDendriteAll: report.section_type = Dendrite; report.section_all_compartments = true; break; case TargetType::SectionApical: report.section_type = Apical; report.section_all_compartments = false; break; case TargetType::SectionApicalAll: report.section_type = Apical; report.section_all_compartments = true; break; default: std::cerr << "Report error: unsupported target type" << std::endl; nrn_abort(1); } } std::vector create_report_configurations(const std::string& conf_file, const std::string& output_dir, SpikesInfo& spikes_info) { std::string report_on; int target; std::ifstream report_conf(conf_file); int num_reports = 0; report_conf >> num_reports; std::vector reports(num_reports); for (auto& report: reports) { report.buffer_size = 4; // default size to 4 Mb report_conf >> report.name >> report.target_name >> report.type_str >> report_on >> report.unit >> report.format >> target >> report.report_dt >> report.start >> report.stop >> report.num_gids >> report.buffer_size; report.target_type = static_cast(target); std::transform(report.type_str.begin(), report.type_str.end(), report.type_str.begin(), [](unsigned char c) { return std::tolower(c); }); report.output_path = output_dir + "/" + report.name; ReportType report_type; if (report.type_str == "compartment") { report_type = SectionReport; if (report_on == "i_membrane") { nrn_use_fast_imem = true; report_type = IMembraneReport; } } else if (report.type_str == "synapse") { report_type = SynapseReport; } else if (report.type_str == "summation") { report_type = SummationReport; } else { std::cerr << "Report error: unsupported type " << report.type_str << std::endl; nrn_abort(1); } register_target_type(report, report_type); if (report.type == SynapseReport || report.type == SummationReport) { parse_filter_string(report_on, report); } if (report.num_gids) { report.target.resize(report.num_gids); report_conf.ignore(std::numeric_limits::max(), '\n'); report_conf.read(reinterpret_cast(report.target.data()), report.num_gids * sizeof(int)); // extra new line: skip report_conf.ignore(std::numeric_limits::max(), '\n'); } } // read population information for spike report int num_populations; std::string spikes_population_name; int spikes_population_offset; if (report_conf.peek() == '\n') { // skip newline and move forward to spike reports report_conf.ignore(std::numeric_limits::max(), '\n'); } if (isdigit(report_conf.peek())) { report_conf >> num_populations; } else { // support old format: one single line "All" num_populations = 1; } for (int i = 0; i < num_populations; i++) { if (!(report_conf >> spikes_population_name >> spikes_population_offset)) { // support old format: one single line "All" report_conf >> spikes_population_name; spikes_population_offset = 0; } spikes_info.population_info.emplace_back( std::make_pair(spikes_population_name, spikes_population_offset)); } report_conf >> spikes_info.file_name; return reports; } } // namespace coreneuron ================================================ FILE: coreneuron/io/reports/report_event.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "report_event.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/reports/nrnreport.hpp" #include "coreneuron/utils/nrn_assert.h" #ifdef ENABLE_BIN_REPORTS #include "reportinglib/Records.h" #endif // ENABLE_BIN_REPORTS #ifdef ENABLE_SONATA_REPORTS #include "bbp/sonata/reports.h" #endif // ENABLE_SONATA_REPORTS namespace coreneuron { #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) ReportEvent::ReportEvent(double dt, double tstart, const VarsToReport& filtered_gids, const char* name, double report_dt) : dt(dt) , tstart(tstart) , report_path(name) , report_dt(report_dt) , vars_to_report(filtered_gids) { nrn_assert(filtered_gids.size()); step = tstart / dt; reporting_period = static_cast(report_dt / dt); gids_to_report.reserve(filtered_gids.size()); for (const auto& gid: filtered_gids) { gids_to_report.push_back(gid.first); } std::sort(gids_to_report.begin(), gids_to_report.end()); } void ReportEvent::summation_alu(NrnThread* nt) { // Sum currents only on reporting steps if (step > 0 && (static_cast(step) % reporting_period) == 0) { auto& summation_report = nt->summation_report_handler_->summation_reports_[report_path]; // Add currents of all variables in each segment double sum = 0.0; for (const auto& kv: summation_report.currents_) { int segment_id = kv.first; for (const auto& value: kv.second) { double current_value = *value.first; int scale = value.second; sum += current_value * scale; } summation_report.summation_[segment_id] = sum; sum = 0.0; } // Add all currents in the soma // Only when type summation and soma target if (!summation_report.gid_segments_.empty()) { double sum_soma = 0.0; for (const auto& kv: summation_report.gid_segments_) { int gid = kv.first; for (const auto& segment_id: kv.second) { sum_soma += summation_report.summation_[segment_id]; } *(vars_to_report[gid].front().var_value) = sum_soma; sum_soma = 0.0; } } } } /** on deliver, call ReportingLib and setup next event */ void ReportEvent::deliver(double t, NetCvode* nc, NrnThread* nt) { /* reportinglib is not thread safe */ #pragma omp critical { summation_alu(nt); // each thread needs to know its own step #ifdef ENABLE_BIN_REPORTS records_nrec(step, gids_to_report.size(), gids_to_report.data(), report_path.data()); #endif #ifdef ENABLE_SONATA_REPORTS sonata_record_node_data(step, gids_to_report.size(), gids_to_report.data(), report_path.data()); #endif send(t + dt, nc, nt); step++; } } bool ReportEvent::require_checkpoint() { return false; } #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/report_event.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include #include #include #include "coreneuron/network/netcon.hpp" #include "coreneuron/network/netcvode.hpp" namespace coreneuron { #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) struct VarWithMapping { uint32_t id; double* var_value; VarWithMapping(int id_, double* v_) : id(id_) , var_value(v_) {} }; // mapping the set of variables pointers to report to its gid using VarsToReport = std::unordered_map>; class ReportEvent: public DiscreteEvent { public: ReportEvent(double dt, double tstart, const VarsToReport& filtered_gids, const char* name, double report_dt); /** on deliver, call ReportingLib and setup next event */ void deliver(double t, NetCvode* nc, NrnThread* nt) override; bool require_checkpoint() override; void summation_alu(NrnThread* nt); private: double dt; double step; std::string report_path; double report_dt; int reporting_period; std::vector gids_to_report; double tstart; VarsToReport vars_to_report; }; #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/report_handler.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "report_handler.hpp" #include "coreneuron/io/nrnsection_mapping.hpp" #include "coreneuron/mechanism/mech_mapping.hpp" #include "coreneuron/utils/utils.hpp" namespace coreneuron { template std::vector intersection_gids(const NrnThread& nt, std::vector& target_gids) { std::vector thread_gids; for (int i = 0; i < nt.ncell; i++) { thread_gids.push_back(nt.presyns[i].gid_); } std::vector intersection; std::sort(thread_gids.begin(), thread_gids.end()); std::sort(target_gids.begin(), target_gids.end()); std::set_intersection(thread_gids.begin(), thread_gids.end(), target_gids.begin(), target_gids.end(), back_inserter(intersection)); return intersection; } void ReportHandler::create_report(ReportConfiguration& report_config, double dt, double tstop, double delay) { #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) if (report_config.start < t) { report_config.start = t; } report_config.stop = std::min(report_config.stop, tstop); for (const auto& mech: report_config.mech_names) { report_config.mech_ids.emplace_back(nrn_get_mechtype(mech.data())); } if (report_config.type == SynapseReport && report_config.mech_ids.empty()) { std::cerr << "[ERROR] mechanism to report: " << report_config.mech_names[0] << " is not mapped in this simulation, cannot report on it \n"; nrn_abort(1); } for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; double* report_variable = nt._actual_v; if (!nt.ncell) { continue; } const std::vector& nodes_to_gid = map_gids(nt); const std::vector gids_to_report = intersection_gids(nt, report_config.target); VarsToReport vars_to_report; bool is_soma_target; switch (report_config.type) { case IMembraneReport: report_variable = nt.nrn_fast_imem->nrn_sav_rhs; case SectionReport: vars_to_report = get_section_vars_to_report(nt, gids_to_report, report_variable, report_config.section_type, report_config.section_all_compartments); is_soma_target = report_config.section_type == SectionType::Soma || report_config.section_type == SectionType::Cell; register_section_report(nt, report_config, vars_to_report, is_soma_target); break; case SummationReport: vars_to_report = get_summation_vars_to_report(nt, gids_to_report, report_config, nodes_to_gid); register_custom_report(nt, report_config, vars_to_report); break; default: vars_to_report = get_synapse_vars_to_report(nt, gids_to_report, report_config, nodes_to_gid); register_custom_report(nt, report_config, vars_to_report); } if (!vars_to_report.empty()) { auto report_event = std::make_unique( dt, t, vars_to_report, report_config.output_path.data(), report_config.report_dt); report_event->send(t, net_cvode_instance, &nt); m_report_events.push_back(std::move(report_event)); } } #else if (nrnmpi_myid == 0) { std::cerr << "[WARNING] : Reporting is disabled. Please recompile with either libsonata or " "reportinglib. \n"; } #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) } #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) void ReportHandler::register_section_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, bool is_soma_target) { if (nrnmpi_myid == 0) { std::cerr << "[WARNING] : Format '" << config.format << "' in report '" << config.output_path << "' not supported.\n"; } } void ReportHandler::register_custom_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report) { if (nrnmpi_myid == 0) { std::cerr << "[WARNING] : Format '" << config.format << "' in report '" << config.output_path << "' not supported.\n"; } } std::string getSectionTypeStr(SectionType type) { switch (type) { case All: return "All"; case Cell: case Soma: return "soma"; case Axon: return "axon"; case Dendrite: return "dend"; case Apical: return "apic"; default: std::cerr << "SectionType not handled in getSectionTypeStr" << std::endl; nrn_abort(1); } } void register_sections_to_report(const SecMapping* sections, std::vector& to_report, double* report_variable, bool all_compartments) { for (const auto& section: sections->secmap) { // compartment_id int section_id = section.first; const auto& segment_ids = section.second; // get all compartment values (otherwise, just middle point) if (all_compartments) { for (const auto& segment_id: segment_ids) { // corresponding voltage in coreneuron voltage array double* variable = report_variable + segment_id; to_report.emplace_back(VarWithMapping(section_id, variable)); } } else { nrn_assert(segment_ids.size() % 2); // corresponding voltage in coreneuron voltage array const auto segment_id = segment_ids[segment_ids.size() / 2]; double* variable = report_variable + segment_id; to_report.emplace_back(VarWithMapping(section_id, variable)); } } } VarsToReport ReportHandler::get_section_vars_to_report(const NrnThread& nt, const std::vector& gids_to_report, double* report_variable, SectionType section_type, bool all_compartments) const { VarsToReport vars_to_report; const auto& section_type_str = getSectionTypeStr(section_type); const auto* mapinfo = static_cast(nt.mapping); if (!mapinfo) { std::cerr << "[COMPARTMENTS] Error : mapping information is missing for a Cell group " << nt.ncell << '\n'; nrn_abort(1); } for (const auto& gid: gids_to_report) { const auto& cell_mapping = mapinfo->get_cell_mapping(gid); if (cell_mapping == nullptr) { std::cerr << "[COMPARTMENTS] Error : Compartment mapping information is missing for gid " << gid << '\n'; nrn_abort(1); } std::vector to_report; to_report.reserve(cell_mapping->size()); if (section_type_str == "All") { const auto& section_mapping = cell_mapping->secmapvec; for (const auto& sections: section_mapping) { register_sections_to_report(sections, to_report, report_variable, all_compartments); } } else { /** get section list mapping for the type, if available */ if (cell_mapping->get_seclist_section_count(section_type_str) > 0) { const auto& sections = cell_mapping->get_seclist_mapping(section_type_str); register_sections_to_report(sections, to_report, report_variable, all_compartments); } } vars_to_report[gid] = to_report; } return vars_to_report; } VarsToReport ReportHandler::get_summation_vars_to_report( const NrnThread& nt, const std::vector& gids_to_report, const ReportConfiguration& report, const std::vector& nodes_to_gids) const { VarsToReport vars_to_report; const auto* mapinfo = static_cast(nt.mapping); auto& summation_report = nt.summation_report_handler_->summation_reports_[report.output_path]; if (!mapinfo) { std::cerr << "[COMPARTMENTS] Error : mapping information is missing for a Cell group " << nt.ncell << '\n'; nrn_abort(1); } for (const auto& gid: gids_to_report) { bool has_imembrane = false; // In case we need convertion of units int scale = 1; for (auto i = 0; i < report.mech_ids.size(); ++i) { auto mech_id = report.mech_ids[i]; auto var_name = report.var_names[i]; auto mech_name = report.mech_names[i]; if (mech_name != "i_membrane") { // need special handling for Clamp processes to flip the current value if (mech_name == "IClamp" || mech_name == "SEClamp") { scale = -1; } Memb_list* ml = nt._ml_list[mech_id]; if (!ml) { continue; } for (int j = 0; j < ml->nodecount; j++) { auto segment_id = ml->nodeindices[j]; if ((nodes_to_gids[ml->nodeindices[j]] == gid)) { double* var_value = get_var_location_from_var_name(mech_id, var_name.data(), ml, j); summation_report.currents_[segment_id].push_back( std::make_pair(var_value, scale)); } } } else { has_imembrane = true; } } const auto& cell_mapping = mapinfo->get_cell_mapping(gid); if (cell_mapping == nullptr) { std::cerr << "[SUMMATION] Error : Compartment mapping information is missing for gid " << gid << '\n'; nrn_abort(1); } std::vector to_report; to_report.reserve(cell_mapping->size()); summation_report.summation_.resize(nt.end); double* report_variable = summation_report.summation_.data(); const auto& section_type_str = getSectionTypeStr(report.section_type); if (report.section_type != SectionType::All) { if (cell_mapping->get_seclist_section_count(section_type_str) > 0) { const auto& sections = cell_mapping->get_seclist_mapping(section_type_str); register_sections_to_report(sections, to_report, report_variable, report.section_all_compartments); } } const auto& section_mapping = cell_mapping->secmapvec; for (const auto& sections: section_mapping) { for (auto& section: sections->secmap) { // compartment_id int section_id = section.first; auto& segment_ids = section.second; for (const auto& segment_id: segment_ids) { // corresponding voltage in coreneuron voltage array if (has_imembrane) { summation_report.currents_[segment_id].push_back( std::make_pair(nt.nrn_fast_imem->nrn_sav_rhs + segment_id, 1)); } if (report.section_type == SectionType::All) { double* variable = report_variable + segment_id; to_report.emplace_back(VarWithMapping(section_id, variable)); } else if (report.section_type == SectionType::Cell) { summation_report.gid_segments_[gid].push_back(segment_id); } } } } vars_to_report[gid] = to_report; } return vars_to_report; } VarsToReport ReportHandler::get_synapse_vars_to_report( const NrnThread& nt, const std::vector& gids_to_report, const ReportConfiguration& report, const std::vector& nodes_to_gids) const { VarsToReport vars_to_report; for (const auto& gid: gids_to_report) { // There can only be 1 mechanism nrn_assert(report.mech_ids.size() == 1); auto mech_id = report.mech_ids[0]; auto var_name = report.var_names[0]; Memb_list* ml = nt._ml_list[mech_id]; if (!ml) { continue; } std::vector to_report; to_report.reserve(ml->nodecount); for (int j = 0; j < ml->nodecount; j++) { double* is_selected = get_var_location_from_var_name(mech_id, SELECTED_VAR_MOD_NAME, ml, j); bool report_variable = false; /// if there is no variable in mod file then report on every compartment /// otherwise check the flag set in mod file if (is_selected == nullptr) { report_variable = true; } else { report_variable = *is_selected != 0.; } if ((nodes_to_gids[ml->nodeindices[j]] == gid) && report_variable) { double* var_value = get_var_location_from_var_name(mech_id, var_name.data(), ml, j); double* synapse_id = get_var_location_from_var_name(mech_id, SYNAPSE_ID_MOD_NAME, ml, j); nrn_assert(synapse_id && var_value); to_report.emplace_back(static_cast(*synapse_id), var_value); } } if (!to_report.empty()) { vars_to_report[gid] = to_report; } } return vars_to_report; } // map GIDs of every compartment, it consist in a backward sweep then forward sweep algorithm std::vector ReportHandler::map_gids(const NrnThread& nt) const { std::vector nodes_gid(nt.end, -1); // backward sweep: from presyn compartment propagate back GID to parent for (int i = 0; i < nt.n_presyn; i++) { const int gid = nt.presyns[i].gid_; const int thvar_index = nt.presyns[i].thvar_index_; // only for non artificial cells if (thvar_index >= 0) { // setting all roots gids of the presyns nodes, // index 0 have parent set to 0, so we must stop at j > 0 // also 0 is the parent of all, so it is an error to attribute a GID to it. nodes_gid[thvar_index] = gid; for (int j = thvar_index; j > 0; j = nt._v_parent_index[j]) { nodes_gid[nt._v_parent_index[j]] = gid; } } } // forward sweep: setting all compartements nodes to the GID of its root // already sets on above loop. This is working only because compartments are stored in order // parents follow by childrens for (int i = nt.ncell + 1; i < nt.end; i++) { nodes_gid[i] = nodes_gid[nt._v_parent_index[i]]; } return nodes_gid; } #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/report_handler.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include #include "nrnreport.hpp" #include "coreneuron/io/reports/report_event.hpp" #include "coreneuron/sim/multicore.hpp" namespace coreneuron { class ReportHandler { public: virtual ~ReportHandler() = default; virtual void create_report(ReportConfiguration& config, double dt, double tstop, double delay); #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) virtual void register_section_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, bool is_soma_target); virtual void register_custom_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report); VarsToReport get_section_vars_to_report(const NrnThread& nt, const std::vector& gids_to_report, double* report_variable, SectionType section_type, bool all_compartments) const; VarsToReport get_summation_vars_to_report(const NrnThread& nt, const std::vector& gids_to_report, const ReportConfiguration& report, const std::vector& nodes_to_gids) const; VarsToReport get_synapse_vars_to_report(const NrnThread& nt, const std::vector& gids_to_report, const ReportConfiguration& report, const std::vector& nodes_to_gids) const; std::vector map_gids(const NrnThread& nt) const; #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) protected: #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) std::vector> m_report_events; #endif // defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) }; } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/sonata_report_handler.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "sonata_report_handler.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/io/nrnsection_mapping.hpp" #include "coreneuron/mechanism/mech_mapping.hpp" #ifdef ENABLE_SONATA_REPORTS #include "bbp/sonata/reports.h" #endif // ENABLE_SONATA_REPORTS namespace coreneuron { void SonataReportHandler::create_report(ReportConfiguration& config, double dt, double tstop, double delay) { #ifdef ENABLE_SONATA_REPORTS sonata_set_atomic_step(dt); #endif // ENABLE_SONATA_REPORTS ReportHandler::create_report(config, dt, tstop, delay); } #ifdef ENABLE_SONATA_REPORTS void SonataReportHandler::register_section_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, bool is_soma_target) { register_report(nt, config, vars_to_report); } void SonataReportHandler::register_custom_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report) { register_report(nt, config, vars_to_report); } std::pair SonataReportHandler::get_population_info(int gid) { if (m_spikes_info.population_info.empty()) { return std::make_pair("All", 0); } std::pair prev = m_spikes_info.population_info.front(); for (const auto& name_offset: m_spikes_info.population_info) { std::string pop_name = name_offset.first; int pop_offset = name_offset.second; if (pop_offset > gid) { break; } prev = name_offset; } return prev; } void SonataReportHandler::register_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report) { sonata_create_report(config.output_path.data(), config.start, config.stop, config.report_dt, config.unit.data(), config.type_str.data()); sonata_set_report_max_buffer_size_hint(config.output_path.data(), config.buffer_size); for (const auto& kv: vars_to_report) { uint64_t gid = kv.first; const std::vector& vars = kv.second; if (!vars.size()) continue; const auto& pop_info = get_population_info(gid); std::string population_name = pop_info.first; int population_offset = pop_info.second; sonata_add_node(config.output_path.data(), population_name.data(), population_offset, gid); sonata_set_report_max_buffer_size_hint(config.output_path.data(), config.buffer_size); for (const auto& variable: vars) { sonata_add_element(config.output_path.data(), population_name.data(), gid, variable.id, variable.var_value); } } } #endif // ENABLE_SONATA_REPORTS } // Namespace coreneuron ================================================ FILE: coreneuron/io/reports/sonata_report_handler.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include #include "report_handler.hpp" namespace coreneuron { class SonataReportHandler: public ReportHandler { public: SonataReportHandler(const SpikesInfo& spikes_info) : m_spikes_info(spikes_info) {} void create_report(ReportConfiguration& config, double dt, double tstop, double delay) override; #ifdef ENABLE_SONATA_REPORTS void register_section_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report, bool is_soma_target) override; void register_custom_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report) override; private: void register_report(const NrnThread& nt, const ReportConfiguration& config, const VarsToReport& vars_to_report); std::pair get_population_info(int gid); #endif // ENABLE_SONATA_REPORTS private: SpikesInfo m_spikes_info; }; } // Namespace coreneuron ================================================ FILE: coreneuron/io/setup_fornetcon.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/coreneuron.hpp" #include "coreneuron/io/setup_fornetcon.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include #include namespace coreneuron { /** If FOR_NETCON in use, setup NrnThread fornetcon related info. i.e NrnThread._fornetcon_perm_indices, NrnThread._fornetcon_weight_perm, and the relevant dparam element of each mechanism instance that uses a FOR_NETCONS statement. Makes use of nrn_fornetcon_cnt_, nrn_fornetcon_type_, and nrn_fornetcon_index_ that were specified during registration of mechanisms that use FOR_NETCONS. nrn_fornetcon_cnt_ is the number of mechanisms that use FOR_NETCONS, nrn_fornetcon_type_ is an int array of size nrn_fornetcon_cnt, that specifies the mechanism type. nrn_fornetcon_index_ is an int array of size nrn_fornetcon_cnt, that specifies the index into an instance's dparam int array having the fornetcon semantics. FOR_NETCONS (args) means to loop over all NetCon connecting to this target instance and args are the names of the items of each NetCon's weight vector (same as the enclosing NET_RECEIVE but possible different local names). NrnThread._weights is a vector of weight groups where the number of groups is the number of NetCon in this thread and each group has a size equal to the number of args in the target NET_RECEIVE block. The order of these groups is the NetCon Object order in HOC (the construction order). So the weight vector indices for the NetCons in the FOR_NETCONS loop are not adjacent. NrnThread._fornetcon_weight_perm is an index vector into the NrnThread._weight vector such that the list of indices that targets a mechanism instance are adjacent. NrnThread._fornetcon_perm_indices is an index vector into the NrnThread._fornetcon_weight_perm to the first of the list of NetCon weights that target the instance. The index of _fornetcon_perm_indices containing this first in the list is stored in the mechanism instances dparam at the dparam's semantic fornetcon slot. (Note that the next index points to the first index of the next target instance.) **/ static int* fornetcon_slot(const int mtype, const int instance, const int fnslot, const NrnThread& nt) { int layout = corenrn.get_mech_data_layout()[mtype]; int sz = corenrn.get_prop_dparam_size()[mtype]; Memb_list* ml = nt._ml_list[mtype]; int* fn = nullptr; if (layout == Layout::AoS) { fn = ml->pdata + (instance * sz + fnslot); } else if (layout == Layout::SoA) { int padded_cnt = nrn_soa_padded_size(ml->nodecount, layout); fn = ml->pdata + (fnslot * padded_cnt + instance); } return fn; } void setup_fornetcon_info(NrnThread& nt) { if (nrn_fornetcon_cnt_ == 0) { return; } // Mechanism types in use that have FOR_NETCONS statements // Nice to have the dparam fornetcon slot as well so use map // instead of set std::map type_to_slot; for (int i = 0; i < nrn_fornetcon_cnt_; ++i) { int type = nrn_fornetcon_type_[i]; Memb_list* ml = nt._ml_list[type]; if (ml && ml->nodecount) { type_to_slot[type] = nrn_fornetcon_index_[i]; } } if (type_to_slot.empty()) { return; } // How many NetCons (weight groups) are involved. // Also count how many weight groups for each target instance. // For the latter we can count in the dparam fornetcon slot. // zero the dparam fornetcon slot for counting and count number of slots. size_t n_perm_indices = 0; for (const auto& kv: type_to_slot) { int mtype = kv.first; int fnslot = kv.second; int nodecount = nt._ml_list[mtype]->nodecount; for (int i = 0; i < nodecount; ++i) { int* fn = fornetcon_slot(mtype, i, fnslot, nt); *fn = 0; n_perm_indices += 1; } } // Count how many weight groups for each slot and total number of weight groups size_t n_weight_perm = 0; for (int i = 0; i < nt.n_netcon; ++i) { NetCon& nc = nt.netcons[i]; int mtype = nc.target_->_type; auto search = type_to_slot.find(mtype); if (search != type_to_slot.end()) { int i_instance = nc.target_->_i_instance; int* fn = fornetcon_slot(mtype, i_instance, search->second, nt); *fn += 1; n_weight_perm += 1; } } // Displacement vector has an extra element since the number for last item // at n-1 is x[n] - x[n-1] and number for first is x[0] = 0. delete[] std::exchange(nt._fornetcon_perm_indices, nullptr); delete[] std::exchange(nt._fornetcon_weight_perm, nullptr); // Manual memory management because of needing to copy NrnThread to the GPU // and update device-side pointers there. Note the {} ensure the allocated // arrays are zero-initalised. nt._fornetcon_perm_indices_size = n_perm_indices + 1; nt._fornetcon_perm_indices = new size_t[nt._fornetcon_perm_indices_size]{}; nt._fornetcon_weight_perm_size = n_weight_perm; nt._fornetcon_weight_perm = new size_t[nt._fornetcon_weight_perm_size]{}; // From dparam fornetcon slots, compute displacement vector, and // set the dparam fornetcon slot to the index of the displacement vector // to allow later filling the _fornetcon_weight_perm. size_t i_perm_indices = 0; nt._fornetcon_perm_indices[0] = 0; for (const auto& kv: type_to_slot) { int mtype = kv.first; int fnslot = kv.second; int nodecount = nt._ml_list[mtype]->nodecount; for (int i = 0; i < nodecount; ++i) { int* fn = fornetcon_slot(mtype, i, fnslot, nt); nt._fornetcon_perm_indices[i_perm_indices + 1] = nt._fornetcon_perm_indices[i_perm_indices] + size_t(*fn); *fn = int(nt._fornetcon_perm_indices[i_perm_indices]); i_perm_indices += 1; } } // One more iteration over NetCon to fill in weight index for // nt._fornetcon_weight_perm. To help with this we increment the // dparam fornetcon slot on each use. for (int i = 0; i < nt.n_netcon; ++i) { NetCon& nc = nt.netcons[i]; int mtype = nc.target_->_type; auto search = type_to_slot.find(mtype); if (search != type_to_slot.end()) { int i_instance = nc.target_->_i_instance; int* fn = fornetcon_slot(mtype, i_instance, search->second, nt); size_t nc_w_index = size_t(nc.u.weight_index_); nt._fornetcon_weight_perm[size_t(*fn)] = nc_w_index; *fn += 1; // next item conceptually adjacent } } // Put back the proper values into the dparam fornetcon slot i_perm_indices = 0; for (const auto& kv: type_to_slot) { int mtype = kv.first; int fnslot = kv.second; int nodecount = nt._ml_list[mtype]->nodecount; for (int i = 0; i < nodecount; ++i) { int* fn = fornetcon_slot(mtype, i, fnslot, nt); *fn = int(i_perm_indices); i_perm_indices += 1; } } } } // namespace coreneuron ================================================ FILE: coreneuron/io/setup_fornetcon.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/sim/multicore.hpp" namespace coreneuron { /** If FOR_NETCON in use, setup NrnThread fornetcon related info. **/ void setup_fornetcon_info(NrnThread& nt); } // namespace coreneuron ================================================ FILE: coreneuron/io/user_params.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once namespace coreneuron { class CheckPoints; /// This structure is data needed is several part of nrn_setup, phase1 and phase2. /// Before it was globals variables, group them to give them as a single argument. /// They have for the most part, nothing related to each other. struct UserParams { UserParams(int ngroup_, int* gidgroups_, const char* path_, const char* restore_path_, CheckPoints& checkPoints_) : ngroup(ngroup_) , gidgroups(gidgroups_) , path(path_) , restore_path(restore_path_) , file_reader(ngroup_) , checkPoints(checkPoints_) {} /// direct memory mode with neuron, do not open files /// Number of local cell groups const int ngroup; /// Array of cell group numbers (indices) const int* const gidgroups; /// path to dataset file const char* const path; /// Dataset path from where simulation is being restored const char* const restore_path; std::vector file_reader; CheckPoints& checkPoints; }; } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/capac.cpp ================================================ /*** THIS FILE IS AUTO GENERATED DONT MODIFY IT. ***/ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/coreneuron.hpp" #include "coreneuron/permute/data_layout.hpp" #define _PRAGMA_FOR_INIT_ACC_LOOP_ \ nrn_pragma_acc(parallel loop present(vdata [0:_cntml_padded * nparm]) if (_nt->compute_gpu)) \ nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) #define _STRIDE _cntml_padded + _iml namespace coreneuron { static const char* mechanism[] = {"0", "capacitance", "cm", 0, "i_cap", 0, 0}; void nrn_alloc_capacitance(double*, Datum*, int); void nrn_init_capacitance(NrnThread*, Memb_list*, int); void nrn_jacob_capacitance(NrnThread*, Memb_list*, int); void nrn_div_capacity(NrnThread*, Memb_list*, int); void nrn_mul_capacity(NrnThread*, Memb_list*, int); #define nparm 2 void capacitance_reg(void) { /* all methods deal with capacitance in special ways */ register_mech(mechanism, nrn_alloc_capacitance, nullptr, nullptr, nullptr, nrn_init_capacitance, nullptr, nullptr, -1, 1); int mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); hoc_register_prop_size(mechtype, nparm, 0); } #define cm vdata[0 * _STRIDE] #define i_cap vdata[1 * _STRIDE] /* cj is analogous to 1/dt for cvode and daspk for fixed step second order it is 2/dt and for pure implicit fixed step it is 1/dt It used to be static but is now a thread data variable */ void nrn_jacob_capacitance(NrnThread* _nt, Memb_list* ml, int /* type */) { int _cntml_actual = ml->nodecount; int _cntml_padded = ml->_nodecount_padded; int _iml; double* vdata; double cfac = .001 * _nt->cj; (void) _cntml_padded; /* unused when layout=1*/ double* _vec_d = _nt->_actual_d; { /*if (use_cachevec) {*/ int* ni = ml->nodeindices; vdata = ml->data; nrn_pragma_acc(parallel loop present(vdata [0:_cntml_padded * nparm], ni [0:_cntml_actual], _vec_d [0:_nt->end]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) for (_iml = 0; _iml < _cntml_actual; _iml++) { _vec_d[ni[_iml]] += cfac * cm; } } } void nrn_init_capacitance(NrnThread* _nt, Memb_list* ml, int /* type */) { int _cntml_actual = ml->nodecount; int _cntml_padded = ml->_nodecount_padded; double* vdata; (void) _cntml_padded; /* unused */ // skip initialization if restoring from checkpoint if (_nrn_skip_initmodel == 1) { return; } vdata = ml->data; _PRAGMA_FOR_INIT_ACC_LOOP_ for (int _iml = 0; _iml < _cntml_actual; _iml++) { i_cap = 0; } } void nrn_cur_capacitance(NrnThread* _nt, Memb_list* ml, int /* type */) { int _cntml_actual = ml->nodecount; int _cntml_padded = ml->_nodecount_padded; double* vdata; double cfac = .001 * _nt->cj; /*@todo: verify cfac is being copied !! */ (void) _cntml_padded; /* unused when layout=1*/ /* since rhs is dvm for a full or half implicit step */ /* (nrn_update_2d() replaces dvi by dvi-dvx) */ /* no need to distinguish secondorder */ int* ni = ml->nodeindices; double* _vec_rhs = _nt->_actual_rhs; vdata = ml->data; nrn_pragma_acc(parallel loop present(vdata [0:_cntml_padded * nparm], ni [0:_cntml_actual], _vec_rhs [0:_nt->end]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) for (int _iml = 0; _iml < _cntml_actual; _iml++) { i_cap = cfac * cm * _vec_rhs[ni[_iml]]; } } /* the rest can be constructed automatically from the above info*/ void nrn_alloc_capacitance(double* data, Datum* pdata, int type) { (void) pdata; (void) type; /* unused */ data[0] = DEF_cm; /*default capacitance/cm^2*/ } void nrn_div_capacity(NrnThread* _nt, Memb_list* ml, int type) { (void) type; int _cntml_actual = ml->nodecount; int _cntml_padded = ml->_nodecount_padded; int _iml; double* vdata; (void) _nt; (void) type; (void) _cntml_padded; /* unused */ int* ni = ml->nodeindices; vdata = ml->data; _PRAGMA_FOR_INIT_ACC_LOOP_ for (_iml = 0; _iml < _cntml_actual; _iml++) { i_cap = VEC_RHS(ni[_iml]); VEC_RHS(ni[_iml]) /= 1.e-3 * cm; // fprintf(stderr, "== nrn_div_cap: RHS[%d]=%.12f\n", ni[_iml], VEC_RHS(ni[_iml])) ; } } void nrn_mul_capacity(NrnThread* _nt, Memb_list* ml, int type) { (void) type; int _cntml_actual = ml->nodecount; int _cntml_padded = ml->_nodecount_padded; int _iml; double* vdata; (void) _nt; (void) type; (void) _cntml_padded; /* unused */ int* ni = ml->nodeindices; const double cfac = .001 * _nt->cj; vdata = ml->data; _PRAGMA_FOR_INIT_ACC_LOOP_ for (_iml = 0; _iml < _cntml_actual; _iml++) { VEC_RHS(ni[_iml]) *= cfac * cm; } } } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/eion.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /// THIS FILE IS AUTO GENERATED DONT MODIFY IT. #include #include #include "coreneuron/coreneuron.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mechanism/membfunc.hpp" #include "coreneuron/permute/data_layout.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #define _STRIDE _cntml_padded + _iml namespace coreneuron { // for each ion it refers to internal concentration, external concentration, and charge, const int ion_global_map_member_size = 3; #define nparm 5 static const char* mechanism[] = {/*just a template*/ "0", "na_ion", "ena", "nao", "nai", 0, "ina", "dina_dv_", 0, 0}; void nrn_init_ion(NrnThread*, Memb_list*, int); void nrn_alloc_ion(double*, Datum*, int); static int na_ion, k_ion, ca_ion; /* will get type for these special ions */ int nrn_is_ion(int type) { // Old: commented to remove dependency on memb_func and alloc function // return (memb_func[type].alloc == ion_alloc); return (type < nrn_ion_global_map_size // type smaller than largest ion's && nrn_ion_global_map[type] != nullptr); // allocated ion charge variables } int nrn_ion_global_map_size; double** nrn_ion_global_map; #define global_conci(type) nrn_ion_global_map[type][0] #define global_conco(type) nrn_ion_global_map[type][1] #define global_charge(type) nrn_ion_global_map[type][2] double nrn_ion_charge(int type) { return global_charge(type); } void ion_reg(const char* name, double valence) { char buf[7][50]; #define VAL_SENTINAL -10000. sprintf(buf[0], "%s_ion", name); sprintf(buf[1], "e%s", name); sprintf(buf[2], "%si", name); sprintf(buf[3], "%so", name); sprintf(buf[5], "i%s", name); sprintf(buf[6], "di%s_dv_", name); for (int i = 0; i < 7; i++) { mechanism[i + 1] = buf[i]; } mechanism[5] = nullptr; /* buf[4] not used above */ int mechtype = nrn_get_mechtype(buf[0]); if (mechtype >= nrn_ion_global_map_size || nrn_ion_global_map[mechtype] == nullptr) { // if hasn't yet been allocated // allocates mem for ion in ion_map and sets null all non-ion types if (nrn_ion_global_map_size <= mechtype) { int size = mechtype + 1; nrn_ion_global_map = (double**) erealloc(nrn_ion_global_map, sizeof(double*) * size); for (int i = nrn_ion_global_map_size; i < mechtype; i++) { nrn_ion_global_map[i] = nullptr; } nrn_ion_global_map_size = mechtype + 1; } nrn_ion_global_map[mechtype] = (double*) emalloc(ion_global_map_member_size * sizeof(double)); register_mech((const char**) mechanism, nrn_alloc_ion, nrn_cur_ion, nullptr, nullptr, nrn_init_ion, nullptr, nullptr, -1, 1); mechtype = nrn_get_mechtype(mechanism[1]); _nrn_layout_reg(mechtype, SOA_LAYOUT); hoc_register_prop_size(mechtype, nparm, 1); hoc_register_dparam_semantics(mechtype, 0, "iontype"); nrn_writes_conc(mechtype, 1); { // See https://en.cppreference.com/w/cpp/io/c/fprintf: If a call to // sprintf or snprintf causes copying to take place between objects // that overlap, the behavior is undefined. std::string const old_buf_0{buf[0]}; sprintf(buf[0], "%si0_%s", name, old_buf_0.c_str()); } sprintf(buf[1], "%so0_%s", name, buf[0]); if (strcmp("na", name) == 0) { na_ion = mechtype; global_conci(mechtype) = DEF_nai; global_conco(mechtype) = DEF_nao; global_charge(mechtype) = 1.; } else if (strcmp("k", name) == 0) { k_ion = mechtype; global_conci(mechtype) = DEF_ki; global_conco(mechtype) = DEF_ko; global_charge(mechtype) = 1.; } else if (strcmp("ca", name) == 0) { ca_ion = mechtype; global_conci(mechtype) = DEF_cai; global_conco(mechtype) = DEF_cao; global_charge(mechtype) = 2.; } else { global_conci(mechtype) = DEF_ioni; global_conco(mechtype) = DEF_iono; global_charge(mechtype) = VAL_SENTINAL; } } double val = global_charge(mechtype); if (valence != VAL_SENTINAL && val != VAL_SENTINAL && valence != val) { fprintf(stderr, "%s ion valence defined differently in\n\ two USEION statements (%g and %g)\n", buf[0], valence, global_charge(mechtype)); nrn_exit(1); } else if (valence == VAL_SENTINAL && val == VAL_SENTINAL) { fprintf(stderr, "%s ion valence must be defined in\n\ the USEION statement of any model using this ion\n", buf[0]); nrn_exit(1); } else if (valence != VAL_SENTINAL) { global_charge(mechtype) = valence; } } #if VECTORIZE #define erev pd[0 * _STRIDE] /* From Eion */ #define conci pd[1 * _STRIDE] #define conco pd[2 * _STRIDE] #define cur pd[3 * _STRIDE] #define dcurdv pd[4 * _STRIDE] /* handle erev, conci, conc0 "in the right way" according to ion_style default. See nrn/lib/help/nrnoc.help. ion_style("name_ion", [c_style, e_style, einit, eadvance, cinit]) ica is assigned eca is parameter but if conc exists then eca is assigned if conc is nrnocCONST then eca calculated on finitialize if conc is STATE then eca calculated on fadvance and conc finitialize with global nai0, nao0 nernst(ci, co, charge) and ghk(v, ci, co, charge) available to hoc and models. */ #define iontype ppd[_iml] /* how _AMBIGUOUS is to be handled */ /*the bitmap is 03 concentration unused, nrnocCONST, DEP, STATE 04 initialize concentrations 030 reversal potential unused, nrnocCONST, DEP, STATE 040 initialize reversal potential 0100 calc reversal during fadvance 0200 ci being written by a model 0400 co being written by a model */ #define charge global_charge(type) #define conci0 global_conci(type) #define conco0 global_conco(type) double nrn_nernst_coef(int type) { /* for computing jacobian element dconc'/dconc */ return ktf(celsius) / charge; } /* Must be called prior to any channels which update the currents */ void nrn_cur_ion(NrnThread* nt, Memb_list* ml, int type) { int _cntml_actual = ml->nodecount; double* pd; Datum* ppd; (void) nt; /* unused */ /*printf("ion_cur %s\n", memb_func[type].sym->name);*/ int _cntml_padded = ml->_nodecount_padded; pd = ml->data; ppd = ml->pdata; // clang-format off nrn_pragma_acc(parallel loop present(pd[0:_cntml_padded * 5], ppd[0:_cntml_actual], nrn_ion_global_map[0:nrn_ion_global_map_size] [0:ion_global_map_member_size]) if (nt->compute_gpu) async(nt->stream_id)) // clang-format on nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (int _iml = 0; _iml < _cntml_actual; ++_iml) { dcurdv = 0.; cur = 0.; if (iontype & 0100) { erev = nrn_nernst(conci, conco, charge, celsius); } }; } /* Must be called prior to other models which possibly also initialize concentrations based on their own states */ void nrn_init_ion(NrnThread* nt, Memb_list* ml, int type) { int _cntml_actual = ml->nodecount; double* pd; Datum* ppd; (void) nt; /* unused */ // skip initialization if restoring from checkpoint if (_nrn_skip_initmodel == 1) { return; } /*printf("ion_init %s\n", memb_func[type].sym->name);*/ int _cntml_padded = ml->_nodecount_padded; pd = ml->data; ppd = ml->pdata; // There was no async(...) clause in the initial OpenACC implementation, so // no `nowait` clause has been added to the OpenMP implementation. TODO: // verify if this can be made asynchronous or if there is a strong reason it // needs to be like this. // clang-format off nrn_pragma_acc(parallel loop present(pd[0:_cntml_padded * 5], ppd[0:_cntml_actual], nrn_ion_global_map[0:nrn_ion_global_map_size] [0:ion_global_map_member_size]) if (nt->compute_gpu)) // clang-format on nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (int _iml = 0; _iml < _cntml_actual; ++_iml) { if (iontype & 04) { conci = conci0; conco = conco0; } if (iontype & 040) { erev = nrn_nernst(conci, conco, charge, celsius); } } } void nrn_alloc_ion(double* p, Datum* ppvar, int _type) { assert(0); } void second_order_cur(NrnThread* _nt, int secondorder) { int _cntml_padded; double* pd; (void) _nt; /* unused */ double* _vec_rhs = _nt->_actual_rhs; if (secondorder == 2) { for (NrnThreadMembList* tml = _nt->tml; tml; tml = tml->next) if (nrn_is_ion(tml->index)) { Memb_list* ml = tml->ml; int _cntml_actual = ml->nodecount; int* ni = ml->nodeindices; _cntml_padded = ml->_nodecount_padded; pd = ml->data; nrn_pragma_acc(parallel loop present(pd [0:_cntml_padded * 5], ni [0:_cntml_actual], _vec_rhs [0:_nt->end]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) for (int _iml = 0; _iml < _cntml_actual; ++_iml) { cur += dcurdv * (_vec_rhs[ni[_iml]]); } } } } } // namespace coreneuron #endif ================================================ FILE: coreneuron/mechanism/eion.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /// THIS FILE IS AUTO GENERATED DONT MODIFY IT. #pragma once namespace coreneuron { extern int nrn_is_ion(int); extern void ion_reg(const char*, double); } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/mech/cfile/cabvars.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ namespace coreneuron { extern void capacitance_reg(void), _passive_reg(void), #if EXTRACELLULAR extracell_reg_(void), #endif _stim_reg(void), _hh_reg(void), _netstim_reg(void), _expsyn_reg(void), _exp2syn_reg(void), _svclmp_reg(void); static void (*mechanism[])(void) = {/* type will start at 3 */ capacitance_reg, _passive_reg, #if EXTRACELLULAR /* extracellular requires special handling and must be type 5 */ extracell_reg_, #endif nullptr}; } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/mech/enginemech.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ /** * \file * \brief Provides interface function for CoreNEURON mechanism library and NEURON * * libcorenrnmech is a interface library provided to building standalone executable * special-core. Also, it is used by NEURON to run CoreNEURON via dlopen to execute * models via in-memory transfer. */ #include #include namespace coreneuron { /** Mechanism registration function * * If external mechanisms present then use modl_reg function generated * in mod_func.cpp otherwise use empty one. */ #ifdef ADDITIONAL_MECHS extern void modl_reg(); #else void modl_reg() {} #endif /// variables defined in coreneuron library extern bool nrn_have_gaps; extern bool nrn_use_fast_imem; /// function defined in coreneuron library extern void nrn_cleanup_ion_map(); } // namespace coreneuron /** Initialize mechanisms and run simulation using CoreNEURON * * This is mainly used to build nrniv-core executable */ int solve_core(int argc, char** argv) { mk_mech_init(argc, argv); coreneuron::modl_reg(); int ret = run_solve_core(argc, argv); coreneuron::nrn_cleanup_ion_map(); return ret; } extern "C" { /// global variables from coreneuron library extern bool corenrn_embedded; extern int corenrn_embedded_nthread; /// parse arguments from neuron and prepare new one for coreneuron char* prepare_args(int& argc, char**& argv, int use_mpi, const char* mpi_lib, const char* nrn_arg); /// initialize standard mechanisms from coreneuron void mk_mech_init(int argc, char** argv); /// set openmp threads equal to neuron's pthread void set_openmp_threads(int nthread); /** Run CoreNEURON in embedded mode with NEURON * * @param nthread Number of Pthreads on NEURON side * @param have_gaps True if gap junctions are used * @param use_mpi True if MPI is used on NEURON side * @param use_fast_imem True if fast imembrance calculation enabled * @param nrn_arg Command line arguments passed by NEURON * @return 1 if embedded mode is used otherwise 0 * \todo Change return type semantics */ int corenrn_embedded_run(int nthread, int have_gaps, int use_mpi, int use_fast_imem, const char* mpi_lib, const char* nrn_arg) { // set coreneuron's internal variable based on neuron arguments corenrn_embedded = true; corenrn_embedded_nthread = nthread; coreneuron::nrn_have_gaps = have_gaps != 0; coreneuron::nrn_use_fast_imem = use_fast_imem != 0; // set number of openmp threads set_openmp_threads(nthread); // pre-process argumnets from neuron and prepare new for coreneuron int argc; char** argv; char* new_arg = prepare_args(argc, argv, use_mpi, mpi_lib, nrn_arg); // initialize internal arguments mk_mech_init(argc, argv); // initialize extra arguments built into special-core static bool modl_reg_called = false; if (!modl_reg_called) { coreneuron::modl_reg(); modl_reg_called = true; } // run simulation run_solve_core(argc, argv); // free temporary string created from prepare_args free(new_arg); // delete array for argv delete[] argv; return corenrn_embedded ? 1 : 0; } } ================================================ FILE: coreneuron/mechanism/mech/mod2c_core_thread.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mechanism/mechanism.hpp" #include "coreneuron/utils/offload.hpp" namespace coreneuron { #define _STRIDE _cntml_padded + _iml #define _threadargscomma_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _ml, _v, #define _threadargsprotocomma_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ Memb_list *_ml, double _v, #define _threadargs_ _iml, _cntml_padded, _p, _ppvar, _thread, _nt, _ml, _v #define _threadargsproto_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ Memb_list *_ml, double _v struct Elm { unsigned row; /* Row location */ unsigned col; /* Column location */ double* value; /* The value SOA _cntml_padded of them*/ struct Elm* r_up; /* Link to element in same column */ struct Elm* r_down; /* in solution order */ struct Elm* c_left; /* Link to left element in same row */ struct Elm* c_right; /* in solution order (see getelm) */ }; struct Item { Elm* elm{}; unsigned norder{}; /* order of a row */ Item* next{}; Item* prev{}; }; using List = Item; /* list of mixed items */ struct SparseObj { /* all the state information */ Elm** rowst{}; /* link to first element in row (solution order)*/ Elm** diag{}; /* link to pivot element in row (solution order)*/ void* elmpool{}; /* no interthread cache line sharing for elements */ unsigned neqn{}; /* number of equations */ unsigned _cntml_padded{}; /* number of instances */ unsigned* varord{}; /* row and column order for pivots */ double* rhs{}; /* initially- right hand side finally - answer */ unsigned* ngetcall{}; /* per instance counter for number of calls to _getelm */ int phase{}; /* 0-solution phase; 1-count phase; 2-build list phase */ int numop{}; unsigned coef_list_size{}; double** coef_list{}; /* pointer to (first instance) value in _getelm order */ /* don't really need the rest */ int nroworder{}; /* just for freeing */ Item** roworder{}; /* roworder[i] is pointer to order item for row i. Does not have to be in orderlist */ List* orderlist{}; /* list of rows sorted by norder that haven't been used */ int do_flag{}; }; extern void _nrn_destroy_sparseobj_thread(SparseObj* so); // derived from nrn/src/scopmath/euler.c // updated for aos/soa layout index template int euler_thread(int neqn, int* var, int* der, F fun, _threadargsproto_) { double const dt{_nt->_dt}; /* calculate the derivatives */ fun(_threadargs_); // std::invoke in C++17 /* update dependent variables */ for (int i = 0; i < neqn; i++) { _p[var[i] * _STRIDE] += dt * (_p[der[i] * _STRIDE]); } return 0; } template int derivimplicit_thread(int n, int* slist, int* dlist, F fun, _threadargsproto_) { fun(_threadargs_); // std::invoke in C++17 return 0; } void nrn_sparseobj_copyto_device(SparseObj* so); void nrn_sparseobj_delete_from_device(SparseObj* so); } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/mech/mod_func.c.pl ================================================ #!/usr/bin/perl # # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= #Construct the modl_reg() function from a provided list #of modules. #Usage : mod_func.c.pl[MECH1.mod MECH2.mod...] @mods = @ARGV; s/\.mod$// foreach @mods; @mods=sort @mods; if(!@mods) { print STDERR "mod_func.c.pl: No mod files provided"; print "// No mod files provided namespace coreneuron { void modl_reg() {} } "; exit 0; } print << "__eof"; #include namespace coreneuron { extern int nrnmpi_myid; extern int nrn_nobanner_; extern int @{[join ",\n ", map{"_${_}_reg(void)"} @mods]}; void modl_reg() { if (!nrn_nobanner_ && nrnmpi_myid < 1) { fprintf(stderr, " Additional mechanisms from files\\n"); @{[join "\n ", map{"fprintf(stderr, \" $_.mod\");"} @mods] } fprintf(stderr, "\\n\\n"); } @{[join "\n ", map{"_${_}_reg();"} @mods] } } } //namespace coreneuron __eof ================================================ FILE: coreneuron/mechanism/mech/modfile/exp2syn.mod ================================================ COMMENT Two state kinetic scheme synapse described by rise time tau1, and decay time constant tau2. The normalized peak condunductance is 1. Decay time MUST be greater than rise time. The solution of A->G->bath with rate constants 1/tau1 and 1/tau2 is A = a*exp(-t/tau1) and G = a*tau2/(tau2-tau1)*(-exp(-t/tau1) + exp(-t/tau2)) where tau1 < tau2 If tau2-tau1 is very small compared to tau1, this is an alphasynapse with time constant tau2. If tau1/tau2 is very small, this is single exponential decay with time constant tau2. The factor is evaluated in the initial block such that an event of weight 1 generates a peak conductance of 1. Because the solution is a sum of exponentials, the coupled equations can be solved as a pair of independent equations by the more efficient cnexp method. ENDCOMMENT NEURON { POINT_PROCESS Exp2Syn RANGE tau1, tau2, e, i NONSPECIFIC_CURRENT i RANGE g } UNITS { (nA) = (nanoamp) (mV) = (millivolt) (uS) = (microsiemens) } PARAMETER { tau1 = 0.1 (ms) <1e-9,1e9> tau2 = 10 (ms) <1e-9,1e9> e=0 (mV) } ASSIGNED { v (mV) i (nA) g (uS) factor } STATE { A (uS) B (uS) } INITIAL { LOCAL tp if (tau1/tau2 > 0.9999) { tau1 = 0.9999*tau2 } if (tau1/tau2 < 1e-9) { tau1 = tau2*1e-9 } A = 0 B = 0 tp = (tau1*tau2)/(tau2 - tau1) * log(tau2/tau1) factor = -exp(-tp/tau1) + exp(-tp/tau2) factor = 1/factor } BREAKPOINT { SOLVE state METHOD cnexp g = B - A i = g*(v - e) } DERIVATIVE state { A' = -A/tau1 B' = -B/tau2 } NET_RECEIVE(weight (uS)) { A = A + weight*factor B = B + weight*factor } ================================================ FILE: coreneuron/mechanism/mech/modfile/expsyn.mod ================================================ NEURON { POINT_PROCESS ExpSyn RANGE tau, e, i NONSPECIFIC_CURRENT i } UNITS { (nA) = (nanoamp) (mV) = (millivolt) (uS) = (microsiemens) } PARAMETER { tau = 0.1 (ms) <1e-9,1e9> e = 0 (mV) } ASSIGNED { v (mV) i (nA) } STATE { g (uS) } INITIAL { g=0 } BREAKPOINT { SOLVE state METHOD cnexp i = g*(v - e) } DERIVATIVE state { g' = -g/tau } NET_RECEIVE(weight (uS)) { g = g + weight } ================================================ FILE: coreneuron/mechanism/mech/modfile/hh.mod ================================================ TITLE hh.mod squid sodium, potassium, and leak channels COMMENT This is the original Hodgkin-Huxley treatment for the set of sodium, potassium, and leakage channels found in the squid giant axon membrane. ("A quantitative description of membrane current and its application conduction and excitation in nerve" J.Physiol. (Lond.) 117:500-544 (1952).) Membrane voltage is in absolute mV and has been reversed in polarity from the original HH convention and shifted to reflect a resting potential of -65 mV. Remember to set celsius=6.3 (or whatever) in your HOC file. See squid.hoc for an example of a simulation using this model. SW Jaslove 6 March, 1992 ENDCOMMENT UNITS { (mA) = (milliamp) (mV) = (millivolt) (S) = (siemens) } ? interface NEURON { SUFFIX hh USEION na READ ena WRITE ina USEION k READ ek WRITE ik NONSPECIFIC_CURRENT il RANGE gnabar, gkbar, gl, el, gna, gk :GLOBAL minf, hinf, ninf, mtau, htau, ntau RANGE minf, hinf, ninf, mtau, htau, ntau THREADSAFE : assigned GLOBALs will be per thread } PARAMETER { gnabar = .12 (S/cm2) <0,1e9> gkbar = .036 (S/cm2) <0,1e9> gl = .0003 (S/cm2) <0,1e9> el = -54.3 (mV) } STATE { m h n } ASSIGNED { v (mV) celsius (degC) ena (mV) ek (mV) gna (S/cm2) gk (S/cm2) ina (mA/cm2) ik (mA/cm2) il (mA/cm2) minf hinf ninf mtau (ms) htau (ms) ntau (ms) } ? currents BREAKPOINT { SOLVE states METHOD cnexp gna = gnabar*m*m*m*h ina = gna*(v - ena) gk = gkbar*n*n*n*n ik = gk*(v - ek) il = gl*(v - el) } INITIAL { rates(v) m = minf h = hinf n = ninf } ? states DERIVATIVE states { rates(v) m' = (minf-m)/mtau h' = (hinf-h)/htau n' = (ninf-n)/ntau } :LOCAL q10 ? rates PROCEDURE rates(v(mV)) { :Computes rate and other constants at current v. :Call once from HOC to initialize inf at resting v. LOCAL alpha, beta, sum, q10 : TABLE minf, mtau, hinf, htau, ninf, ntau DEPEND celsius FROM -100 TO 100 WITH 200 UNITSOFF q10 = 3^((celsius - 6.3)/10) :"m" sodium activation system alpha = .1 * vtrap(-(v+40),10) beta = 4 * exp(-(v+65)/18) sum = alpha + beta mtau = 1/(q10*sum) minf = alpha/sum :"h" sodium inactivation system alpha = .07 * exp(-(v+65)/20) beta = 1 / (exp(-(v+35)/10) + 1) sum = alpha + beta htau = 1/(q10*sum) hinf = alpha/sum :"n" potassium activation system alpha = .01*vtrap(-(v+55),10) beta = .125*exp(-(v+65)/80) sum = alpha + beta ntau = 1/(q10*sum) ninf = alpha/sum } FUNCTION vtrap(x,y) { :Traps for 0 in denominator of rate eqns. if (fabs(x/y) < 1e-6) { vtrap = y*(1 - x/y/2) }else{ vtrap = x/(exp(x/y) - 1) } } UNITSON ================================================ FILE: coreneuron/mechanism/mech/modfile/netstim.mod ================================================ : $Id: netstim.mod 2212 2008-09-08 14:32:26Z hines $ : comments at end : the Random idiom has been extended to support CoreNEURON. : For backward compatibility, noiseFromRandom(hocRandom) can still be used : as well as the default low-quality scop_exprand generator. : However, CoreNEURON will not accept usage of the low-quality generator, : and, if noiseFromRandom is used to specify the random stream, that stream : must be using the Random123 generator. : The recommended idiom for specfication of the random stream is to use : noiseFromRandom123(id1, id2[, id3]) : If any instance uses noiseFromRandom123, then no instance can use noiseFromRandom : and vice versa. NEURON { ARTIFICIAL_CELL NetStim RANGE interval, number, start RANGE noise THREADSAFE : only true if every instance has its own distinct Random BBCOREPOINTER donotuse } PARAMETER { interval = 10 (ms) <1e-9,1e9>: time between spikes (msec) number = 10 <0,1e9> : number of spikes (independent of noise) start = 50 (ms) : start of first spike noise = 0 <0,1> : amount of randomness (0.0 - 1.0) } ASSIGNED { event (ms) on ispike donotuse } VERBATIM #if NRNBBCORE /* running in CoreNEURON */ #define IFNEWSTYLE(arg) arg #else /* running in NEURON */ /* 1 means noiseFromRandom was called when _ran_compat was previously 0 . 2 means noiseFromRandom123 was called when _ran_compat was previously 0. */ static int _ran_compat; /* specifies the noise style for all instances */ #define IFNEWSTYLE(arg) if(_ran_compat == 2) { arg } #endif /* running in NEURON */ ENDVERBATIM :backward compatibility PROCEDURE seed(x) { VERBATIM #if !NRNBBCORE ENDVERBATIM set_seed(x) VERBATIM #endif ENDVERBATIM } INITIAL { VERBATIM if (_p_donotuse) { /* only this style initializes the stream on finitialize */ IFNEWSTYLE(nrnran123_setseq((nrnran123_State*)_p_donotuse, 0, 0);) } ENDVERBATIM on = 0 : off ispike = 0 if (noise < 0) { noise = 0 } if (noise > 1) { noise = 1 } if (start >= 0 && number > 0) { on = 1 : randomize the first spike so on average it occurs at : start + noise*interval event = start + invl(interval) - interval*(1. - noise) : but not earlier than 0 if (event < 0) { event = 0 } net_send(event, 3) } } PROCEDURE init_sequence(t(ms)) { if (number > 0) { on = 1 event = 0 ispike = 0 } } FUNCTION invl(mean (ms)) (ms) { if (mean <= 0.) { mean = .01 (ms) : I would worry if it were 0. } if (noise == 0) { invl = mean }else{ invl = (1. - noise)*mean + noise*mean*erand() } } VERBATIM #include "nrnran123.h" #if !NRNBBCORE /* backward compatibility */ double nrn_random_pick(void* r); void* nrn_random_arg(int argpos); int nrn_random_isran123(void* r, uint32_t* id1, uint32_t* id2, uint32_t* id3); int nrn_random123_setseq(void* r, uint32_t seq, char which); int nrn_random123_getseq(void* r, uint32_t* seq, char* which); #endif ENDVERBATIM FUNCTION erand() { VERBATIM if (_p_donotuse) { /* :Supports separate independent but reproducible streams for : each instance. However, the corresponding hoc Random : distribution MUST be set to Random.negexp(1) */ #if !NRNBBCORE if (_ran_compat == 2) { _lerand = nrnran123_negexp((nrnran123_State*)_p_donotuse); }else{ _lerand = nrn_random_pick(_p_donotuse); } #else _lerand = nrnran123_negexp((nrnran123_State*)_p_donotuse); #endif return _lerand; }else{ #if NRNBBCORE assert(0); #else /* : the old standby. Cannot use if reproducible parallel sim : independent of nhost or which host this instance is on : is desired, since each instance on this cpu draws from : the same stream */ #endif } #if !NRNBBCORE ENDVERBATIM erand = exprand(1) VERBATIM #endif ENDVERBATIM } PROCEDURE noiseFromRandom() { VERBATIM #if !NRNBBCORE { void** pv = (void**)(&_p_donotuse); if (_ran_compat == 2) { fprintf(stderr, "NetStim.noiseFromRandom123 was previously called\n"); assert(0); } _ran_compat = 1; if (ifarg(1)) { *pv = nrn_random_arg(1); }else{ *pv = (void*)0; } } #endif ENDVERBATIM } PROCEDURE noiseFromRandom123() { VERBATIM #if !NRNBBCORE { nrnran123_State** pv = (nrnran123_State**)(&_p_donotuse); if (_ran_compat == 1) { fprintf(stderr, "NetStim.noiseFromRandom was previously called\n"); assert(0); } _ran_compat = 2; if (*pv) { nrnran123_deletestream(*pv); *pv = (nrnran123_State*)0; } if (ifarg(3)) { *pv = nrnran123_newstream3((uint32_t)*getarg(1), (uint32_t)*getarg(2), (uint32_t)*getarg(3)); }else if (ifarg(2)) { *pv = nrnran123_newstream((uint32_t)*getarg(1), (uint32_t)*getarg(2)); } } #endif ENDVERBATIM } DESTRUCTOR { VERBATIM if (!noise) { return; } if (_p_donotuse) { #if NRNBBCORE { /* but note that mod2c does not translate DESTRUCTOR */ #else if (_ran_compat == 2) { #endif nrnran123_State** pv = (nrnran123_State**)(&_p_donotuse); nrnran123_deletestream(*pv); *pv = (nrnran123_State*)0; } } ENDVERBATIM } VERBATIM static void bbcore_write(double* x, int* d, int* xx, int *offset, _threadargsproto_) { if (!noise) { return; } /* error if using the legacy scop_exprand */ if (!_p_donotuse) { fprintf(stderr, "NetStim: cannot use the legacy scop_negexp generator for the random stream.\n"); assert(0); } if (d) { char which; uint32_t* di = ((uint32_t*)d) + *offset; #if !NRNBBCORE if (_ran_compat == 1) { void** pv = (void**)(&_p_donotuse); /* error if not using Random123 generator */ if (!nrn_random_isran123(*pv, di, di+1, di+2)) { fprintf(stderr, "NetStim: Random123 generator is required\n"); assert(0); } nrn_random123_getseq(*pv, di+3, &which); di[4] = (int)which; }else{ #else { #endif nrnran123_State** pv = (nrnran123_State**)(&_p_donotuse); nrnran123_getids3(*pv, di, di+1, di+2); nrnran123_getseq(*pv, di+3, &which); di[4] = (int)which; #if NRNBBCORE /* CORENeuron does not call DESTRUCTOR so... */ nrnran123_deletestream(*pv); *pv = (nrnran123_State*)0; #endif } /*printf("Netstim bbcore_write %d %d %d\n", di[0], di[1], di[3]);*/ } *offset += 5; } static void bbcore_read(double* x, int* d, int* xx, int* offset, _threadargsproto_) { if (!noise) { return; } /* Generally, CoreNEURON, in the context of psolve, begins with an empty model so this call takes place in the context of a freshly created instance and _p_donotuse is not NULL. However, this function is also now called from NEURON at the end of coreneuron psolve in order to transfer back the nrnran123 sequence state. That allows continuation with a subsequent psolve within NEURON or properly transfer back to CoreNEURON if we continue the psolve there. So now, extra logic is needed for this call to work in a NEURON context. */ uint32_t* di = ((uint32_t*)d) + *offset; #if NRNBBCORE nrnran123_State** pv = (nrnran123_State**)(&_p_donotuse); assert(!_p_donotuse); *pv = nrnran123_newstream3(di[0], di[1], di[2]); nrnran123_setseq(*pv, di[3], (char)di[4]); #else uint32_t id1, id2, id3; assert(_p_donotuse); if (_ran_compat == 1) { /* Hoc Random.Random123 */ void** pv = (void**)(&_p_donotuse); int b = nrn_random_isran123(*pv, &id1, &id2, &id3); assert(b); nrn_random123_setseq(*pv, di[3], (char)di[4]); }else{ assert(_ran_compat == 2); nrnran123_State** pv = (nrnran123_State**)(&_p_donotuse); nrnran123_getids3(*pv, &id1, &id2, &id3); nrnran123_setseq(*pv, di[3], (char)di[4]); } /* Random123 on NEURON side has same ids as on CoreNEURON side */ assert(di[0] == id1 && di[1] == id2 && di[2] == id3); #endif *offset += 5; } ENDVERBATIM PROCEDURE next_invl() { if (number > 0) { event = invl(interval) } if (ispike >= number) { on = 0 } } NET_RECEIVE (w) { if (flag == 0) { : external event if (w > 0 && on == 0) { : turn on spike sequence : but not if a netsend is on the queue init_sequence(t) : randomize the first spike so on average it occurs at : noise*interval (most likely interval is always 0) next_invl() event = event - interval*(1. - noise) net_send(event, 1) }else if (w < 0) { : turn off spiking definitively on = 0 } } if (flag == 3) { : from INITIAL if (on == 1) { : but ignore if turned off by external event init_sequence(t) net_send(0, 1) } } if (flag == 1 && on == 1) { ispike = ispike + 1 net_event(t) next_invl() if (on == 1) { net_send(event, 1) } } } FUNCTION bbsavestate() { bbsavestate = 0 : limited to noiseFromRandom123 VERBATIM #if !NRNBBCORE if (_ran_compat == 2) { nrnran123_State** pv = (nrnran123_State**)(&_p_donotuse); if (!*pv) { return 0.0; } char which; uint32_t seq; double *xdir, *xval; xdir = hoc_pgetarg(1); if (*xdir == -1.) { *xdir = 2; return 0.0; } xval = hoc_pgetarg(2); if (*xdir == 0.) { nrnran123_getseq(*pv, &seq, &which); xval[0] = (double)seq; xval[1] = (double)which; } if (*xdir == 1) { nrnran123_setseq(*pv, (uint32_t)xval[0], (char)xval[1]); } } /* else do nothing */ #endif ENDVERBATIM } COMMENT Presynaptic spike generator --------------------------- This mechanism has been written to be able to use synapses in a single neuron receiving various types of presynaptic trains. This is a "fake" presynaptic compartment containing a spike generator. The trains of spikes can be either periodic or noisy (Poisson-distributed) Parameters; noise: between 0 (no noise-periodic) and 1 (fully noisy) interval: mean time between spikes (ms) number: number of spikes (independent of noise) Written by Z. Mainen, modified by A. Destexhe, The Salk Institute Modified by Michael Hines for use with CVode The intrinsic bursting parameters have been removed since generators can stimulate other generators to create complicated bursting patterns with independent statistics (see below) Modified by Michael Hines to use logical event style with NET_RECEIVE This stimulator can also be triggered by an input event. If the stimulator is in the on==0 state (no net_send events on queue) and receives a positive weight event, then the stimulator changes to the on=1 state and goes through its entire spike sequence before changing to the on=0 state. During that time it ignores any positive weight events. If, in an on!=0 state, the stimulator receives a negative weight event, the stimulator will change to the on==0 state. In the on==0 state, it will ignore any ariving net_send events. A change to the on==1 state immediately fires the first spike of its sequence. ENDCOMMENT ================================================ FILE: coreneuron/mechanism/mech/modfile/passive.mod ================================================ TITLE passive membrane channel UNITS { (mV) = (millivolt) (mA) = (milliamp) (S) = (siemens) } NEURON { SUFFIX pas NONSPECIFIC_CURRENT i RANGE g, e } PARAMETER { g = .001 (S/cm2) <0,1e9> e = -70 (mV) } ASSIGNED {v (mV) i (mA/cm2)} BREAKPOINT { i = g*(v - e) } ================================================ FILE: coreneuron/mechanism/mech/modfile/pattern.mod ================================================ : The spikeout pairs (t, gid) resulting from a parallel network simulation : can become the stimulus for any single cpu subnet as long as the gid's are : consistent. : Note: hoc must retain references to the tvec and gidvec vectors : to prevent the Info from going out of existence NEURON { ARTIFICIAL_CELL PatternStim RANGE fake_output THREADSAFE BBCOREPOINTER ptr } PARAMETER { fake_output = 0 } ASSIGNED { ptr } INITIAL { if (initps() > 0) { net_send(0, 1) } } NET_RECEIVE (w) {LOCAL nst if (flag == 1) { nst = sendgroup() if (nst >= t) {net_send(nst - t, 1)} } } VERBATIM struct Info { int size; double* tvec; int* gidvec; int index; }; #define INFOCAST Info** ip = (Info**)(&(_p_ptr)) ENDVERBATIM VERBATIM Info* mkinfo(_threadargsproto_) { INFOCAST; Info* info = (Info*)hoc_Emalloc(sizeof(Info)); hoc_malchk(); info->size = 0; info->tvec = nullptr; info->gidvec = nullptr; info->index = 0; return info; } /* for CoreNEURON checkpoint save and restore */ namespace coreneuron { int checkpoint_save_patternstim(_threadargsproto_) { INFOCAST; Info* info = *ip; return info->index; } void checkpoint_restore_patternstim(int _index, double _te, _threadargsproto_) { INFOCAST; Info* info = *ip; info->index = _index; artcell_net_send(_tqitem, -1, (Point_process*)_nt->_vdata[_ppvar[1*_STRIDE]], _te, 1.0); } } //namespace coreneuron ENDVERBATIM FUNCTION initps() { VERBATIM { INFOCAST; Info* info = *ip; info->index = 0; if (info && info->tvec) { _linitps = 1.; }else{ _linitps = 0.; } } ENDVERBATIM } FUNCTION sendgroup() { VERBATIM { INFOCAST; Info* info = *ip; int size = info->size; int fake_out; double* tvec = info->tvec; int* gidvec = info->gidvec; int i; fake_out = fake_output ? 1 : 0; for (i=0; info->index < size; ++i) { /* only if the gid is NOT on this machine */ nrn_fake_fire(gidvec[info->index], tvec[info->index], fake_out); ++info->index; if (i > 100 && t < tvec[info->index]) { break; } } if (info->index >= size) { _lsendgroup = t - 1.; }else{ _lsendgroup = tvec[info->index]; } } ENDVERBATIM } VERBATIM static void bbcore_write(double* x, int* d, int* xx, int *offset, _threadargsproto_){} static void bbcore_read(double* x, int* d, int* xx, int* offset, _threadargsproto_){} namespace coreneuron { void pattern_stim_setup_helper(int size, double* tv, int* gv, _threadargsproto_) { INFOCAST; Info* info = mkinfo(_threadargs_); *ip = info; info->size = size; info->tvec = tv; info->gidvec = gv; // initiate event chain (needed in case of restore) artcell_net_send ( _tqitem, -1, (Point_process*) _nt->_vdata[_ppvar[1*_STRIDE]], t + 0.0 , 1.0 ) ; } Info** pattern_stim_info_ref(_threadargsproto_) { // Info shared with NEURON. // So nrn <-> corenrn needs no actual transfer for direct mode psolve. INFOCAST; return ip; // Caller sets *ip to NEURON's PatternStim Info* } } // namespace coreneuron ENDVERBATIM ================================================ FILE: coreneuron/mechanism/mech/modfile/stim.mod ================================================ COMMENT Since this is an electrode current, positive values of i depolarize the cell and in the presence of the extracellular mechanism there will be a change in vext since i is not a transmembrane current but a current injected directly to the inside of the cell. ENDCOMMENT NEURON { POINT_PROCESS IClamp RANGE del, dur, amp, i ELECTRODE_CURRENT i } UNITS { (nA) = (nanoamp) } PARAMETER { del (ms) dur (ms) <0,1e9> amp (nA) } ASSIGNED { i (nA) } INITIAL { i = 0 } BREAKPOINT { : for fixed step methos, we can ignore at_time, was introduced for variable timestep, will be deprecated anyway. : at_time(del) : at_time(del+dur) if (t < del + dur && t >= del) { i = amp }else{ i = 0 } } ================================================ FILE: coreneuron/mechanism/mech/modfile/svclmp.mod ================================================ TITLE svclmp.mod COMMENT Single electrode Voltage clamp with three levels. Clamp is on at time 0, and off at time dur1+dur2+dur3. When clamp is off the injected current is 0. The clamp levels are amp1, amp2, amp3. i is the injected current, vc measures the control voltage) Do not insert several instances of this model at the same location in order to make level changes. That is equivalent to independent clamps and they will have incompatible internal state values. The electrical circuit for the clamp is exceedingly simple: vc ---'\/\/`--- cell rs Note that since this is an electrode current model v refers to the internal potential which is equivalent to the membrane potential v when there is no extracellular membrane mechanism present but is v+vext when one is present. Also since i is an electrode current, positive values of i depolarize the cell. (Normally, positive membrane currents are outward and thus hyperpolarize the cell) ENDCOMMENT INDEPENDENT {t FROM 0 TO 1 WITH 1 (ms)} DEFINE NSTEP 3 NEURON { POINT_PROCESS SEClamp ELECTRODE_CURRENT i RANGE dur1, amp1, dur2, amp2, dur3, amp3, rs, vc, i } UNITS { (nA) = (nanoamp) (mV) = (millivolt) (uS) = (microsiemens) } PARAMETER { rs = 1 (megohm) <1e-9, 1e9> dur1 (ms) amp1 (mV) dur2 (ms) <0,1e9> amp2 (mV) dur3 (ms) <0,1e9> amp3 (mV) } ASSIGNED { v (mV) : automatically v + vext when extracellular is present i (nA) vc (mV) tc2 (ms) tc3 (ms) on } INITIAL { tc2 = dur1 + dur2 tc3 = tc2 + dur3 on = 0 } BREAKPOINT { SOLVE icur METHOD after_cvode vstim() } PROCEDURE icur() { if (on) { i = (vc - v)/rs }else{ i = 0 } } COMMENT The SOLVE of icur() in the BREAKPOINT block is necessary to compute i=(vc - v(t))/rs instead of i=(vc - v(t-dt))/rs This is important for time varying vc because the actual i used in the implicit method is equivalent to (vc - v(t)/rs due to the calculation of di/dv from the BREAKPOINT block. The reason this works is because the SOLVE statement in the BREAKPOINT block is executed after the membrane potential is advanced. It is a shame that vstim has to be called twice but putting the call in a SOLVE block would cause playing a Vector into vc to be off by one time step. ENDCOMMENT PROCEDURE vstim() { on = 1 if (dur1) {at_time(dur1)} if (dur2) {at_time(tc2)} if (dur3) {at_time(tc3)} if (t < dur1) { vc = amp1 }else if (t < tc2) { vc = amp2 }else if (t < tc3) { vc = amp3 }else { vc = 0 on = 0 } icur() } ================================================ FILE: coreneuron/mechanism/mech_mapping.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include "coreneuron/mechanism/mech_mapping.hpp" #include "coreneuron/mechanism/mechanism.hpp" #include "coreneuron/permute/data_layout.hpp" namespace coreneuron { using Offset = size_t; using MechId = int; using VariableName = const char*; struct cmp_str { bool operator()(char const* a, char const* b) const { return std::strcmp(a, b) < 0; } }; /* * Structure that map variable names of mechanisms to their value's location (offset) in memory */ using MechNamesMapping = std::map>; static MechNamesMapping mechNamesMapping; static void set_an_offset(int mech_id, const char* variable_name, int offset) { mechNamesMapping[mech_id][variable_name] = offset; } double* get_var_location_from_var_name(int mech_id, const char* variable_name, Memb_list* ml, int node_index) { if (mechNamesMapping.find(mech_id) == mechNamesMapping.end()) { std::cerr << "ERROR : no variable name mapping exist for mechanism id: " << mech_id << std::endl; abort(); } if (mechNamesMapping.at(mech_id).find(variable_name) == mechNamesMapping.at(mech_id).end()) { std::cerr << "ERROR : no value associtated to variable name: " << variable_name << std::endl; abort(); } int variable_rank = mechNamesMapping.at(mech_id).at(variable_name); int ix = get_data_index(node_index, variable_rank, mech_id, ml); return &(ml->data[ix]); } void register_all_variables_offsets(int mech_id, SerializedNames variable_names) { int idx = 0; int nb_parsed_variables = 0; int current_categorie = 1; while (current_categorie < NB_MECH_VAR_CATEGORIES) { if (variable_names[idx]) { set_an_offset(mech_id, variable_names[idx], nb_parsed_variables); nb_parsed_variables++; } else { current_categorie++; } idx++; } idx++; } } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/mech_mapping.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once /* * todo : currently mod2c has exactly 4 different variable categories * that are registered to coreneuron. */ #define NB_MECH_VAR_CATEGORIES 4 /* * SerializedNames * * names are passed serialized using the following format: * SerializedNames : {"0",[[,]*0,]* [[,]* 0]} * All categories must be filled, if they are emtpy, just an other 0 follow. * * ex: {"0", "name1", "name2", 0, "name3, "name4", 0,0,0} * This means the first categorie with names {name1,name2}, * the second categorie with {name3, name4}, 2 last categories are empty */ namespace coreneuron { struct Memb_list; using SerializedNames = const char**; // return pointer to value of a variable's mechanism, or nullptr if not found extern double* get_var_location_from_var_name(int mech_id, const char* variable_name, Memb_list* ml, int local_index); // initialize mapping of variable names of mechanism, to their places in memory extern void register_all_variables_offsets(int mech_id, SerializedNames variable_names); } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/mechanism.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include "coreneuron/nrnconf.h" #include "coreneuron/utils/memory.h" namespace coreneuron { // OpenACC with PGI compiler has issue when union is used and hence use struct // \todo check if newer PGI versions has resolved this issue #if defined(_OPENACC) struct ThreadDatum { int i; double* pval; void* _pvoid; }; #else union ThreadDatum { double val; int i; double* pval; void* _pvoid; }; #endif /* will go away at some point */ struct Point_process { int _i_instance; short _type; short _tid; /* NrnThread id */ }; struct NetReceiveBuffer_t { int* _displ; /* _displ_cnt + 1 of these */ int* _nrb_index; /* _cnt of these (order of increasing _pnt_index) */ int* _pnt_index; int* _weight_index; double* _nrb_t; double* _nrb_flag; int _cnt; int _displ_cnt; /* number of unique _pnt_index */ int _size; /* capacity */ int _pnt_offset; size_t size_of_object() { size_t nbytes = 0; nbytes += _size * sizeof(int) * 3; nbytes += (_size + 1) * sizeof(int); nbytes += _size * sizeof(double) * 2; return nbytes; } }; struct NetSendBuffer_t: MemoryManaged { int* _sendtype; // net_send, net_event, net_move int* _vdata_index; int* _pnt_index; int* _weight_index; double* _nsb_t; double* _nsb_flag; int _cnt; int _size; /* capacity */ int reallocated; /* if buffer resized/reallocated, needs to be copy to cpu */ NetSendBuffer_t(int size) : _size(size) { _cnt = 0; _sendtype = (int*) ecalloc_align(_size, sizeof(int)); _vdata_index = (int*) ecalloc_align(_size, sizeof(int)); _pnt_index = (int*) ecalloc_align(_size, sizeof(int)); _weight_index = (int*) ecalloc_align(_size, sizeof(int)); // when == 1, NetReceiveBuffer_t is newly allocated (i.e. we need to free previous copy // and recopy new data reallocated = 1; _nsb_t = (double*) ecalloc_align(_size, sizeof(double)); _nsb_flag = (double*) ecalloc_align(_size, sizeof(double)); } size_t size_of_object() { size_t nbytes = 0; nbytes += _size * sizeof(int) * 4; nbytes += _size * sizeof(double) * 2; return nbytes; } ~NetSendBuffer_t() { free_memory(_sendtype); free_memory(_vdata_index); free_memory(_pnt_index); free_memory(_weight_index); free_memory(_nsb_t); free_memory(_nsb_flag); } void grow() { #ifdef CORENEURON_ENABLE_GPU int cannot_reallocate_on_device = 0; assert(cannot_reallocate_on_device); #else int new_size = _size * 2; grow_buf(&_sendtype, _size, new_size); grow_buf(&_vdata_index, _size, new_size); grow_buf(&_pnt_index, _size, new_size); grow_buf(&_weight_index, _size, new_size); grow_buf(&_nsb_t, _size, new_size); grow_buf(&_nsb_flag, _size, new_size); _size = new_size; #endif } private: template void grow_buf(T** buf, int size, int new_size) { T* new_buf = nullptr; new_buf = (T*) ecalloc_align(new_size, sizeof(T)); memcpy(new_buf, *buf, size * sizeof(T)); free(*buf); *buf = new_buf; } }; struct Memb_list { /* nodeindices contains all nodes this extension is responsible for, * ordered according to the matrix. This allows to access the matrix * directly via the nrn_actual_* arrays instead of accessing it in the * order of insertion and via the node-structure, making it more * cache-efficient */ int* nodeindices = nullptr; int* _permute = nullptr; double* data = nullptr; Datum* pdata = nullptr; ThreadDatum* _thread = nullptr; /* thread specific data (when static is no good) */ NetReceiveBuffer_t* _net_receive_buffer = nullptr; NetSendBuffer_t* _net_send_buffer = nullptr; int nodecount; /* actual node count */ int _nodecount_padded; void* instance{nullptr}; /* mechanism instance struct */ // nrn_acc_manager.cpp handles data movement to/from the accelerator as the // "private constructor" in the translated MOD file code is called before // the main nrn_acc_manager methods that copy thread/mechanism data to the // device void* global_variables{nullptr}; std::size_t global_variables_size{}; }; } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/membfunc.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/mechanism/mechanism.hpp" #include "coreneuron/utils/offload.hpp" #include "coreneuron/utils/units.hpp" #include #include namespace coreneuron { using Pfrpdat = Datum* (*) (void); struct NrnThread; using mod_alloc_t = void (*)(double*, Datum*, int); using mod_f_t = void (*)(NrnThread*, Memb_list*, int); using pnt_receive_t = void (*)(Point_process*, int, double); using thread_table_check_t = void (*)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int); /* * Memb_func structure contains all related informations of a mechanism */ struct Memb_func { mod_alloc_t alloc; mod_f_t current; mod_f_t jacob; mod_f_t state; mod_f_t initialize; mod_f_t constructor; mod_f_t destructor; /* only for point processes */ // These are used for CoreNEURON-internal allocation/cleanup; they are kept // separate from the CONSTRUCTOR/DESTRUCTOR functions just above (one of // which is apparently only for point processes) for simplicity. mod_f_t private_constructor; mod_f_t private_destructor; Symbol* sym; int vectorized; int thread_size_; /* how many Datum needed in Memb_list if vectorized */ void (*thread_mem_init_)(ThreadDatum*); /* after Memb_list._thread is allocated */ void (*thread_cleanup_)(ThreadDatum*); /* before Memb_list._thread is freed */ thread_table_check_t thread_table_check_; int is_point; void (*setdata_)(double*, Datum*); int* dparam_semantics; /* for nrncore writing. */ ~Memb_func(); }; #define VINDEX -1 #define CABLESECTION 1 #define MORPHOLOGY 2 #define CAP 3 #define EXTRACELL 5 #define nrnocCONST 1 #define DEP 2 #define STATE 3 /*See init.c and cabvars.h for order of nrnocCONST, DEP, and STATE */ #define BEFORE_INITIAL 0 #define AFTER_INITIAL 1 #define BEFORE_BREAKPOINT 2 #define AFTER_SOLVE 3 #define BEFORE_STEP 4 #define BEFORE_AFTER_SIZE 5 /* 1 more than the previous */ struct BAMech { mod_f_t f; int type; struct BAMech* next; }; extern int nrn_ion_global_map_size; extern double** nrn_ion_global_map; extern const int ion_global_map_member_size; #define NRNPOINTER \ 4 /* added on to list of mechanism variables.These are \ pointers which connect variables from other mechanisms via the _ppval array. \ */ #define _AMBIGUOUS 5 extern int nrn_get_mechtype(const char*); extern const char* nrn_get_mechname(int); // slow. use memb_func[i].sym if posible extern int register_mech(const char** m, mod_alloc_t alloc, mod_f_t cur, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, mod_f_t private_constructor, mod_f_t private_destructor, int nrnpointerindex, int vectorized); extern int point_register_mech(const char**, mod_alloc_t alloc, mod_f_t cur, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, mod_f_t private_constructor, mod_f_t private_destructor, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, int vectorized); extern void register_constructor(mod_f_t constructor); using NetBufReceive_t = void (*)(NrnThread*); extern void hoc_register_net_receive_buffering(NetBufReceive_t, int); extern void hoc_register_net_send_buffering(int); using nrn_watch_check_t = void (*)(NrnThread*, Memb_list*); extern void hoc_register_watch_check(nrn_watch_check_t, int); extern void nrn_jacob_capacitance(NrnThread*, Memb_list*, int); extern void nrn_writes_conc(int, int); constexpr double ktf(double celsius) { return 1000. * units::gasconstant * (celsius + 273.15) / units::faraday; } // std::log isn't constexpr, but there are argument values for which nrn_nernst // is a constant expression constexpr double nrn_nernst(double ci, double co, double z, double celsius) { if (z == 0) { return 0.; } if (ci <= 0.) { return 1e6; } else if (co <= 0.) { return -1e6; } else { return ktf(celsius) / z * std::log(co / ci); } } constexpr void nrn_wrote_conc(int type, double* p1, int p2, int it, double** gimap, double celsius, int _cntml_padded) { if (it & 040) { constexpr int _iml = 0; int const STRIDE{_cntml_padded + _iml}; /* passing _nt to this function causes cray compiler to segfault during compilation * hence passing _cntml_padded */ double* pe = p1 - p2 * STRIDE; pe[0] = nrn_nernst(pe[1 * STRIDE], pe[2 * STRIDE], gimap[type][2], celsius); } } inline double nrn_ghk(double v, double ci, double co, double z, double celsius) { auto const efun = [](double x) { if (std::abs(x) < 1e-4) { return 1. - x / 2.; } else { return x / (std::exp(x) - 1.); } }; double const temp{z * v / ktf(celsius)}; double const eco{co * efun(+temp)}; double const eci{ci * efun(-temp)}; return .001 * z * units::faraday * (eci - eco); } extern void hoc_register_prop_size(int, int, int); extern void hoc_register_dparam_semantics(int type, int, const char* name); extern void hoc_reg_ba(int, mod_f_t, int); struct DoubScal { const char* name; double* pdoub; }; struct DoubVec { const char* name; double* pdoub; int index1; }; struct VoidFunc { const char* name; void (*func)(void); }; extern void hoc_register_var(DoubScal*, DoubVec*, VoidFunc*); extern void _nrn_layout_reg(int, int); extern void _nrn_thread_reg0(int i, void (*f)(ThreadDatum*)); extern void _nrn_thread_reg1(int i, void (*f)(ThreadDatum*)); using bbcore_read_t = void (*)(double*, int*, int*, int*, int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, double); using bbcore_write_t = void (*)(double*, int*, int*, int*, int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, double); extern int nrn_mech_depend(int type, int* dependencies); extern int nrn_fornetcon_cnt_; extern int* nrn_fornetcon_type_; extern int* nrn_fornetcon_index_; extern void add_nrn_fornetcons(int, int); extern void add_nrn_has_net_event(int); extern void net_event(Point_process*, double); extern void net_send(void**, int, Point_process*, double, double); extern void net_move(void**, Point_process*, double); extern void artcell_net_send(void**, int, Point_process*, double, double); extern void artcell_net_move(void**, Point_process*, double); extern void nrn2ncs_outputevent(int netcon_output_index, double firetime); extern bool nrn_use_localgid_; extern void net_sem_from_gpu(int sendtype, int i_vdata, int, int ith, int ipnt, double, double); // _OPENACC and/or NET_RECEIVE_BUFFERING extern void net_sem_from_gpu(int, int, int, int, int, double, double); extern void hoc_malchk(void); /* just a stub */ extern void* hoc_Emalloc(size_t); } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/patternstim.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ // Want to have the classical NEURON PatternStim functionality available // in coreneuron to allow debugging and trajectory verification on // desktop single process tests. Since pattern.mod provides most of what // we need even in the coreneuron context, we placed a minimally modified // version of that in coreneuron/mechanism/mech/modfile/pattern.mod and this file // provides an interface that creates an instance of the // PatternStim ARTIFICIAL_CELL in thread 0 and attaches the spike raster // data to it. #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/io/output_spikes.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/coreneuron.hpp" namespace coreneuron { // from translated patstim.mod void _pattern_reg(void); // from patstim.mod extern void pattern_stim_setup_helper(int size, double* tvec, int* gidvec, int icnt, int cnt, double* _p, Datum* _ppvar, ThreadDatum* _thread, NrnThread* _nt, Memb_list* ml, double v); static size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double tstop); int nrn_extra_thread0_vdata; void nrn_set_extra_thread0_vdata() { // limited to PatternStim for now. // if called, must be called before nrn_setup and after mk_mech. int type = nrn_get_mechtype("PatternStim"); if (!corenrn.get_memb_func(type).initialize) { _pattern_reg(); } nrn_extra_thread0_vdata = corenrn.get_prop_dparam_size()[type]; } // fname is the filename of an output_spikes.h format raster file. // todo : add function for memory cleanup (to be called at the end of simulation) void nrn_mkPatternStim(const char* fname, double tstop) { int type = nrn_get_mechtype("PatternStim"); if (!corenrn.get_memb_func(type).sym) { printf("nrn_set_extra_thread_vdata must be called (after mk_mech, and before nrn_setup\n"); assert(0); } // if there is empty thread then return, don't need patternstim if (nrn_threads == nullptr || nrn_threads->ncell == 0) { return; } double* tvec; int* gidvec; // todo : handle when spike raster will be very large (int < size_t) size_t size = read_raster_file(fname, &tvec, &gidvec, tstop); Point_process* pnt = nrn_artcell_instantiate("PatternStim"); NrnThread* nt = nrn_threads + pnt->_tid; Memb_list* ml = nt->_ml_list[type]; int layout = corenrn.get_mech_data_layout()[type]; int sz = corenrn.get_prop_param_size()[type]; int psz = corenrn.get_prop_dparam_size()[type]; int _cntml = ml->nodecount; int _iml = pnt->_i_instance; double* _p = ml->data; Datum* _ppvar = ml->pdata; if (layout == Layout::AoS) { _p += _iml * sz; _ppvar += _iml * psz; } else if (layout == Layout::SoA) { ; } else { assert(0); } pattern_stim_setup_helper(size, tvec, gidvec, _iml, _cntml, _p, _ppvar, nullptr, nt, ml, 0.0); } size_t read_raster_file(const char* fname, double** tvec, int** gidvec, double tstop) { FILE* f = fopen(fname, "r"); nrn_assert(f); // skip first line containing "scatter" string char dummy[100]; nrn_assert(fgets(dummy, 100, f)); std::vector> spikes; spikes.reserve(10000); double stime; int gid; while (fscanf(f, "%lf %d\n", &stime, &gid) == 2) { if (stime >= t && stime <= tstop) { spikes.push_back(std::make_pair(stime, gid)); } } fclose(f); // pattern.mod expects sorted spike raster (this is to avoid // injecting all events at the begining of the simulation). // sort spikes according to time std::sort(spikes.begin(), spikes.end()); // fill gid and time vectors *tvec = (double*) emalloc(spikes.size() * sizeof(double)); *gidvec = (int*) emalloc(spikes.size() * sizeof(int)); for (size_t i = 0; i < spikes.size(); i++) { (*tvec)[i] = spikes[i].first; (*gidvec)[i] = spikes[i].second; } return spikes.size(); } // see nrn_setup.cpp:read_phase2 for how it creates NrnThreadMembList instances. static NrnThreadMembList* alloc_nrn_thread_memb(NrnThread* nt, int type) { NrnThreadMembList* tml = (NrnThreadMembList*) ecalloc(1, sizeof(NrnThreadMembList)); tml->dependencies = nullptr; tml->ndependencies = 0; tml->index = type; tml->next = nullptr; // fill in tml->ml info. The data is not in the cache efficient // NrnThread arrays but there should not be many of these instances. int psize = corenrn.get_prop_param_size()[type]; int dsize = corenrn.get_prop_dparam_size()[type]; int layout = corenrn.get_mech_data_layout()[type]; tml->ml = (Memb_list*) ecalloc(1, sizeof(Memb_list)); tml->ml->nodecount = 1; tml->ml->_nodecount_padded = tml->ml->nodecount; tml->ml->nodeindices = nullptr; tml->ml->data = (double*) ecalloc(tml->ml->nodecount * psize, sizeof(double)); tml->ml->pdata = (Datum*) ecalloc(nrn_soa_padded_size(tml->ml->nodecount, layout) * dsize, sizeof(Datum)); tml->ml->_thread = nullptr; tml->ml->_net_receive_buffer = nullptr; tml->ml->_net_send_buffer = nullptr; tml->ml->_permute = nullptr; if (auto* const priv_ctor = corenrn.get_memb_func(tml->index).private_constructor) { priv_ctor(nt, tml->ml, tml->index); } return tml; } // Opportunistically implemented to create a single PatternStim. // So only does enough to get that functionally incorporated into the model // and other types may require additional work. In particular, we // append a new NrnThreadMembList with one item to the thread 0 tml list // in order for the artificial cell to get its INITIAL block called but // we do not modify any of the other thread 0 data arrays or counts. Point_process* nrn_artcell_instantiate(const char* mechname) { int type = nrn_get_mechtype(mechname); NrnThread* nt = nrn_threads + 0; // printf("nrn_artcell_instantiate %s type=%d\n", mechname, type); // create and append to nt.tml auto tml = alloc_nrn_thread_memb(nt, type); assert(nt->_ml_list[type] == nullptr); // FIXME nt->_ml_list[type] = tml->ml; if (!nt->tml) { nt->tml = tml; } else { for (NrnThreadMembList* i = nt->tml; i; i = i->next) { if (!i->next) { i->next = tml; break; } } } // Here we have a problem with no easy general solution. ml->pdata are // integer indexes into the nt->_data nt->_idata and nt->_vdata array // depending on context, // but nrn_setup.cpp allocated these to exactly have the size needed by // the file defined model (at least for _vdata) and so there are no slots // for pdata to index into for this new instance. // So nrn_setup.cpp:phase2 needs to // be notified that some extra space will be required. For now, defer // the general situation of several instances for several types and // demand that this method is never called more than once. We introduce // a int nrn_extra_thread0_vdata (only that is needed by PatternStim) // which will be used by // nrn_setup.cpp:phase2 to allocate the appropriately larger // _vdata arrays for thread 0 (without changing _nvdata so // that we can fill in the indices here) static int cnt = 0; if (++cnt > 1) { printf("nrn_artcell_instantiate cannot be called more than once\n"); assert(0); } // note that PatternStim internal usage for the 4 ppvar values is: // #define _nd_area _nt->_data[_ppvar[0]] (not used since ARTIFICIAL_CELL) // #define _p_ptr _nt->_vdata[_ppvar[2]] (the BBCORE_POINTER) // #define _tqitem &(_nt->_vdata[_ppvar[3]]) (for net_send) // and general external usage is: // _nt->_vdata[_ppvar[1]] = Point_process* // Point_process* pnt = new Point_process; pnt->_type = type; pnt->_tid = nt->id; pnt->_i_instance = 0; // as though all dparam index into _vdata int dsize = corenrn.get_prop_dparam_size()[type]; assert(dsize <= nrn_extra_thread0_vdata); for (int i = 0; i < dsize; ++i) { tml->ml->pdata[i] = nt->_nvdata + i; } nt->_vdata[nt->_nvdata + 1] = (void*) pnt; return pnt; } } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/register_mech.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/membrane_definitions.h" #include "coreneuron/mechanism/eion.hpp" #include "coreneuron/mechanism/mech_mapping.hpp" #include "coreneuron/mechanism/membfunc.hpp" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" namespace coreneuron { int secondorder = 0; double t, dt, celsius, pi; int rev_dt; using Pfrv = void (*)(); static void ion_write_depend(int type, int etype); void hoc_reg_bbcore_read(int type, bbcore_read_t f) { if (type == -1) { return; } corenrn.get_bbcore_read()[type] = f; } void hoc_reg_bbcore_write(int type, bbcore_write_t f) { if (type == -1) { return; } corenrn.get_bbcore_write()[type] = f; } void add_nrn_has_net_event(int type) { if (type == -1) { return; } corenrn.get_has_net_event().push_back(type); } /* values are type numbers of mechanisms which have FOR_NETCONS statement */ int nrn_fornetcon_cnt_; /* how many models have a FOR_NETCONS statement */ int* nrn_fornetcon_type_; /* what are the type numbers */ int* nrn_fornetcon_index_; /* what is the index into the ppvar array */ void add_nrn_fornetcons(int type, int indx) { if (type == -1) return; int i = nrn_fornetcon_cnt_++; nrn_fornetcon_type_ = (int*) erealloc(nrn_fornetcon_type_, (i + 1) * sizeof(int)); nrn_fornetcon_index_ = (int*) erealloc(nrn_fornetcon_index_, (i + 1) * sizeof(int)); nrn_fornetcon_type_[i] = type; nrn_fornetcon_index_[i] = indx; } void add_nrn_artcell(int type, int qi) { if (type == -1) { return; } corenrn.get_is_artificial()[type] = 1; corenrn.get_artcell_qindex()[type] = qi; } void set_pnt_receive(int type, pnt_receive_t pnt_receive, pnt_receive_t pnt_receive_init, short size) { if (type == -1) { return; } corenrn.get_pnt_receive()[type] = pnt_receive; corenrn.get_pnt_receive_init()[type] = pnt_receive_init; corenrn.get_pnt_receive_size()[type] = size; } void alloc_mech(int memb_func_size_) { corenrn.get_memb_funcs().resize(memb_func_size_); corenrn.get_pnt_map().resize(memb_func_size_); corenrn.get_pnt_receive().resize(memb_func_size_); corenrn.get_pnt_receive_init().resize(memb_func_size_); corenrn.get_pnt_receive_size().resize(memb_func_size_); corenrn.get_watch_check().resize(memb_func_size_); corenrn.get_is_artificial().resize(memb_func_size_, false); corenrn.get_artcell_qindex().resize(memb_func_size_); corenrn.get_prop_param_size().resize(memb_func_size_); corenrn.get_prop_dparam_size().resize(memb_func_size_); corenrn.get_mech_data_layout().resize(memb_func_size_, 1); corenrn.get_bbcore_read().resize(memb_func_size_); corenrn.get_bbcore_write().resize(memb_func_size_); } void initnrn() { secondorder = DEF_secondorder; /* >0 means crank-nicolson. 2 means currents adjusted to t+dt/2 */ t = 0.; /* msec */ dt = DEF_dt; /* msec */ rev_dt = (int) (DEF_rev_dt); /* 1/msec */ celsius = DEF_celsius; /* degrees celsius */ } /* if vectorized then thread_data_size added to it */ int register_mech(const char** m, mod_alloc_t alloc, mod_f_t cur, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, mod_f_t private_constructor, mod_f_t private_destructor, int /* nrnpointerindex */, int vectorized) { auto& memb_func = corenrn.get_memb_funcs(); int type = nrn_get_mechtype(m[1]); // No mechanism in the .dat files if (type == -1) return type; assert(type); #ifdef DEBUG printf("register_mech %s %d\n", m[1], type); #endif if (memb_func[type].sym) { assert(strcmp(memb_func[type].sym, m[1]) == 0); } else { memb_func[type].sym = (char*) emalloc(strlen(m[1]) + 1); strcpy(memb_func[type].sym, m[1]); } memb_func[type].current = cur; memb_func[type].jacob = jacob; memb_func[type].alloc = alloc; memb_func[type].state = stat; memb_func[type].initialize = initialize; memb_func[type].constructor = nullptr; memb_func[type].destructor = nullptr; memb_func[type].private_constructor = private_constructor; memb_func[type].private_destructor = private_destructor; #if VECTORIZE memb_func[type].vectorized = vectorized ? 1 : 0; memb_func[type].thread_size_ = vectorized ? (vectorized - 1) : 0; memb_func[type].thread_mem_init_ = nullptr; memb_func[type].thread_cleanup_ = nullptr; memb_func[type].thread_table_check_ = nullptr; memb_func[type].is_point = 0; memb_func[type].setdata_ = nullptr; memb_func[type].dparam_semantics = nullptr; #endif register_all_variables_offsets(type, &m[2]); return type; } void nrn_writes_conc(int type, int /* unused */) { static int lastion = EXTRACELL + 1; if (type == -1) return; #if CORENRN_DEBUG printf("%s reordered from %d to %d\n", corenrn.get_memb_func(type).sym, type, lastion); #endif if (nrn_is_ion(type)) { ++lastion; } } void _nrn_layout_reg(int type, int layout) { corenrn.get_mech_data_layout()[type] = layout; } void hoc_register_net_receive_buffering(NetBufReceive_t f, int type) { corenrn.get_net_buf_receive().emplace_back(f, type); } void hoc_register_net_send_buffering(int type) { corenrn.get_net_buf_send_type().push_back(type); } void hoc_register_watch_check(nrn_watch_check_t nwc, int type) { corenrn.get_watch_check()[type] = nwc; } void hoc_register_prop_size(int type, int psize, int dpsize) { if (type == -1) return; int pold = corenrn.get_prop_param_size()[type]; int dpold = corenrn.get_prop_dparam_size()[type]; if (psize != pold || dpsize != dpold) { corenrn.get_different_mechanism_type().push_back(type); } corenrn.get_prop_param_size()[type] = psize; corenrn.get_prop_dparam_size()[type] = dpsize; if (dpsize) { corenrn.get_memb_func(type).dparam_semantics = (int*) ecalloc(dpsize, sizeof(int)); } } void hoc_register_dparam_semantics(int type, int ix, const char* name) { /* needed for SoA to possibly reorder name_ion and some "pointer" pointers. */ /* only interested in area, iontype, cvode_ieq, netsend, pointer, pntproc, bbcorepointer, watch, diam, fornetcon xx_ion and #xx_ion which will get a semantics value of -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, type, and type+1000 respectively */ auto& memb_func = corenrn.get_memb_funcs(); if (strcmp(name, "area") == 0) { memb_func[type].dparam_semantics[ix] = -1; } else if (strcmp(name, "iontype") == 0) { memb_func[type].dparam_semantics[ix] = -2; } else if (strcmp(name, "cvodeieq") == 0) { memb_func[type].dparam_semantics[ix] = -3; } else if (strcmp(name, "netsend") == 0) { memb_func[type].dparam_semantics[ix] = -4; } else if (strcmp(name, "pointer") == 0) { memb_func[type].dparam_semantics[ix] = -5; } else if (strcmp(name, "pntproc") == 0) { memb_func[type].dparam_semantics[ix] = -6; } else if (strcmp(name, "bbcorepointer") == 0) { memb_func[type].dparam_semantics[ix] = -7; } else if (strcmp(name, "watch") == 0) { memb_func[type].dparam_semantics[ix] = -8; } else if (strcmp(name, "diam") == 0) { memb_func[type].dparam_semantics[ix] = -9; } else if (strcmp(name, "fornetcon") == 0) { memb_func[type].dparam_semantics[ix] = -10; } else { int i = name[0] == '#' ? 1 : 0; int etype = nrn_get_mechtype(name + i); memb_func[type].dparam_semantics[ix] = etype + i * 1000; /* note that if style is needed (i==1), then we are writing a concentration */ if (i) { ion_write_depend(type, etype); } } #if CORENRN_DEBUG printf("dparam semantics %s ix=%d %s %d\n", memb_func[type].sym, ix, name, memb_func[type].dparam_semantics[ix]); #endif } /* only ion type ion_write_depend_ are non-nullptr */ /* and those are array of integers with first integer being array size */ /* and remaining size-1 integers containing the mechanism types that write concentrations to that * ion */ static void ion_write_depend(int type, int etype) { auto& memb_func = corenrn.get_memb_funcs(); auto& ion_write_depend_ = corenrn.get_ion_write_dependency(); if (ion_write_depend_.size() < memb_func.size()) { ion_write_depend_.resize(memb_func.size()); } int size = !ion_write_depend_[etype].empty() ? ion_write_depend_[etype][0] + 1 : 2; ion_write_depend_[etype].resize(size, 0); ion_write_depend_[etype][0] = size; ion_write_depend_[etype][size - 1] = type; } static int depend_append(int idep, int* dependencies, int deptype, int type) { /* append only if not already in dependencies and != type*/ bool add = true; if (deptype == type) { return idep; } for (int i = 0; i < idep; ++i) { if (deptype == dependencies[i]) { add = false; break; } } if (add) { dependencies[idep++] = deptype; } return idep; } /* return list of types that this type depends on (10 should be more than enough) */ /* dependencies must be an array that is large enough to hold that array */ /* number of dependencies is returned */ int nrn_mech_depend(int type, int* dependencies) { int dpsize = corenrn.get_prop_dparam_size()[type]; int* ds = corenrn.get_memb_func(type).dparam_semantics; int idep = 0; if (ds) for (int i = 0; i < dpsize; ++i) { if (ds[i] > 0 && ds[i] < 1000) { int deptype = ds[i]; int idepnew = depend_append(idep, dependencies, deptype, type); if ((idepnew > idep) && !corenrn.get_ion_write_dependency().empty() && !corenrn.get_ion_write_dependency()[deptype].empty()) { auto& iwd = corenrn.get_ion_write_dependency()[deptype]; int size = iwd[0]; for (int j = 1; j < size; ++j) { idepnew = depend_append(idepnew, dependencies, iwd[j], type); } } idep = idepnew; } } return idep; } void register_constructor(mod_f_t c) { corenrn.get_memb_funcs().back().constructor = c; } void register_destructor(mod_f_t d) { corenrn.get_memb_funcs().back().destructor = d; } int point_reg_helper(const Symbol* s2) { static int next_pointtype = 1; /* starts at 1 since 0 means not point in pnt_map */ int type = nrn_get_mechtype(s2); // No mechanism in the .dat files if (type == -1) return type; corenrn.get_pnt_map()[type] = next_pointtype++; corenrn.get_memb_func(type).is_point = 1; return corenrn.get_pnt_map()[type]; } int point_register_mech(const char** m, mod_alloc_t alloc, mod_f_t cur, mod_f_t jacob, mod_f_t stat, mod_f_t initialize, mod_f_t private_constructor, mod_f_t private_destructor, int nrnpointerindex, mod_f_t constructor, mod_f_t destructor, int vectorized) { const Symbol* s = m[1]; register_mech(m, alloc, cur, jacob, stat, initialize, private_constructor, private_destructor, nrnpointerindex, vectorized); register_constructor(constructor); register_destructor(destructor); return point_reg_helper(s); } void _modl_cleanup() {} int state_discon_allowed_; int state_discon_flag_ = 0; void state_discontinuity(int /* i */, double* pd, double d) { if (state_discon_allowed_ && state_discon_flag_ == 0) { *pd = d; /*printf("state_discontinuity t=%g pd=%lx d=%g\n", t, (long)pd, d);*/ } } void hoc_reg_ba(int mt, mod_f_t f, int type) { if (type == -1) return; switch (type) { /* see bablk in src/nmodl/nocpout.c */ case 11: type = BEFORE_BREAKPOINT; break; case 22: type = AFTER_SOLVE; break; case 13: type = BEFORE_INITIAL; break; case 23: type = AFTER_INITIAL; break; case 14: type = BEFORE_STEP; break; default: printf("before-after processing type %d for %s not implemented\n", type, corenrn.get_memb_func(mt).sym); nrn_exit(1); } auto bam = (BAMech*) emalloc(sizeof(BAMech)); bam->f = f; bam->type = mt; bam->next = nullptr; // keep in call order if (!corenrn.get_bamech()[type]) { corenrn.get_bamech()[type] = bam; } else { BAMech* last; for (last = corenrn.get_bamech()[type]; last->next; last = last->next) { } last->next = bam; } } void _nrn_thread_reg0(int i, void (*f)(ThreadDatum*)) { if (i == -1) return; corenrn.get_memb_func(i).thread_cleanup_ = f; } void _nrn_thread_reg1(int i, void (*f)(ThreadDatum*)) { if (i == -1) return; corenrn.get_memb_func(i).thread_mem_init_ = f; } void _nrn_thread_table_reg(int i, thread_table_check_t f) { if (i == -1) return; corenrn.get_memb_func(i).thread_table_check_ = f; } void _nrn_setdata_reg(int i, void (*call)(double*, Datum*)) { if (i == -1) return; corenrn.get_memb_func(i).setdata_ = call; } Memb_func::~Memb_func() { if (sym != nullptr) { free(sym); } if (dparam_semantics != nullptr) { free(dparam_semantics); } } } // namespace coreneuron ================================================ FILE: coreneuron/mechanism/register_mech.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once namespace coreneuron { void add_nrn_artcell(int type, int qi); void set_pnt_receive(int type, pnt_receive_t pnt_receive, pnt_receive_t pnt_receive_init, short size); extern void initnrn(void); extern void hoc_reg_bbcore_read(int type, bbcore_read_t f); extern void hoc_reg_bbcore_write(int type, bbcore_write_t f); extern void _nrn_thread_table_reg( int i, void (*f)(int, int, double*, Datum*, ThreadDatum*, NrnThread*, Memb_list*, int)); extern void alloc_mech(int); } // namespace coreneuron ================================================ FILE: coreneuron/membrane_definitions.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /* /local/src/master/nrn/src/nrnoc/membdef.h,v 1.2 1995/02/13 20:20:42 hines Exp */ /* numerical parameters */ #define DEF_nseg 1 /* default number of segments per section*/ #define DEF_dt .025 /* ms */ #define DEF_rev_dt 1. / DEF_dt /* 1/ms */ #define DEF_secondorder \ 0 /* >0 means crank-nicolson. 2 means current \ adjusted to t+dt/2 */ /*global parameters */ #define DEF_Ra 35.4 /* ohm-cm */ /*changed from 34.5 on 1/6/95*/ #define DEF_celsius 6.3 /* deg-C */ #define DEF_vrest -65. /* mV */ /* old point process parameters */ /* fclamp */ #define DEF_clamp_resist 1e-3 /* megohm */ /* Parameters that are used in mechanism _alloc() procedures */ /* cable */ #define DEF_L 100. /* microns */ #define DEF_rallbranch 1. /* morphology */ #define DEF_diam 500. /* microns */ /* capacitance */ #define DEF_cm 1. /* uF/cm^2 */ /* fast passive (e_p and g_p)*/ #define DEF_e DEF_vrest /* mV */ #define DEF_g 5.e-4 /* S/cm^2 */ /* na_ion */ #define DEF_nai 10. /* mM */ #define DEF_nao 140. /* mM */ #define DEF_ena (115. + DEF_vrest) /* mV */ /* k_ion */ #define DEF_ki 54.4 /* mM */ #define DEF_ko 2.5 /* mM */ #define DEF_ek (-12. + DEF_vrest) /* mV */ /* ca_ion -> any program that uses DEF_eca must include */ #define DEF_cai 5.e-5 /* mM */ #define DEF_cao 2. /* mM */ #include #define DEF_eca 12.5 * log(DEF_cao / DEF_cai) /* mV */ /* default ion values */ #define DEF_ioni 1. /* mM */ #define DEF_iono 1. /* mM */ #define DEF_eion 0. /* mV */ ================================================ FILE: coreneuron/mpi/core/nrnmpi.hpp ================================================ #pragma once namespace coreneuron { extern int nrnmpi_numprocs; extern int nrnmpi_myid; } // namespace coreneuron ================================================ FILE: coreneuron/mpi/core/nrnmpi_def_cinc.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ namespace coreneuron { int nrnmpi_numprocs = 1; /* size */ int nrnmpi_myid = 0; /* rank */ } // namespace coreneuron ================================================ FILE: coreneuron/mpi/core/nrnmpidec.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "../nrnmpi.h" namespace coreneuron { /* from nrnmpi.cpp */ mpi_function nrnmpi_init{"nrnmpi_init_impl"}; mpi_function nrnmpi_finalize{ "nrnmpi_finalize_impl"}; mpi_function nrnmpi_check_threading_support{"nrnmpi_check_threading_support_impl"}; mpi_function nrnmpi_write_file{ "nrnmpi_write_file_impl"}; /* from mpispike.c */ mpi_function nrnmpi_spike_exchange{ "nrnmpi_spike_exchange_impl"}; mpi_function nrnmpi_spike_exchange_compressed{"nrnmpi_spike_exchange_compressed_impl"}; mpi_function nrnmpi_int_allmax{ "nrnmpi_int_allmax_impl"}; mpi_function nrnmpi_int_allgather{ "nrnmpi_int_allgather_impl"}; mpi_function nrnmpi_int_alltoall{ "nrnmpi_int_alltoall_impl"}; mpi_function nrnmpi_int_alltoallv{ "nrnmpi_int_alltoallv_impl"}; mpi_function nrnmpi_dbl_alltoallv{ "nrnmpi_dbl_alltoallv_impl"}; mpi_function nrnmpi_dbl_allmin{ "nrnmpi_dbl_allmin_impl"}; mpi_function nrnmpi_dbl_allmax{ "nrnmpi_dbl_allmax_impl"}; mpi_function nrnmpi_barrier{ "nrnmpi_barrier_impl"}; mpi_function nrnmpi_dbl_allreduce{ "nrnmpi_dbl_allreduce_impl"}; mpi_function nrnmpi_dbl_allreduce_vec{ "nrnmpi_dbl_allreduce_vec_impl"}; mpi_function nrnmpi_long_allreduce_vec{"nrnmpi_long_allreduce_vec_impl"}; mpi_function nrnmpi_initialized{ "nrnmpi_initialized_impl"}; mpi_function nrnmpi_abort{"nrnmpi_abort_impl"}; mpi_function nrnmpi_wtime{"nrnmpi_wtime_impl"}; mpi_function nrnmpi_local_rank{ "nrnmpi_local_rank_impl"}; mpi_function nrnmpi_local_size{ "nrnmpi_local_size_impl"}; #if NRN_MULTISEND mpi_function nrnmpi_multisend_comm{ "nrnmpi_multisend_comm_impl"}; mpi_function nrnmpi_multisend{ "nrnmpi_multisend_impl"}; mpi_function nrnmpi_multisend_single_advance{"nrnmpi_multisend_single_advance_impl"}; mpi_function nrnmpi_multisend_conserve{"nrnmpi_multisend_conserve_impl"}; #endif // NRN_MULTISEND } // namespace coreneuron ================================================ FILE: coreneuron/mpi/core/resolve.cpp ================================================ #include #include #include "../nrnmpi.h" namespace coreneuron { // Those functions are part of a mechanism to dynamically load mpi or not void mpi_manager_t::resolve_symbols(void* handle) { for (auto* ptr: m_function_ptrs) { assert(!(*ptr)); ptr->resolve(handle); assert(*ptr); } } void mpi_function_base::resolve(void* handle) { dlerror(); void* ptr = dlsym(handle, m_name); const char* error = dlerror(); if (error) { std::ostringstream oss; oss << "Could not get symbol " << m_name << " from handle " << handle << ": " << error; throw std::runtime_error(oss.str()); } assert(ptr); m_fptr = ptr; } } // namespace coreneuron ================================================ FILE: coreneuron/mpi/lib/mpispike.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/nrnconf.h" /* do not want the redef in the dynamic load case */ #include "coreneuron/mpi/nrnmpiuse.h" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/nrnmpidec.h" #include "nrnmpi.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/utils/nrn_assert.h" #include #include namespace coreneuron { extern MPI_Comm nrnmpi_comm; static int np; static int* displs{nullptr}; static int* byteovfl{nullptr}; /* for the compressed transfer method */ static MPI_Datatype spike_type; static void* emalloc(size_t size) { void* memptr = malloc(size); assert(memptr); return memptr; } // Register type NRNMPI_Spike void nrnmpi_spike_initialize() { NRNMPI_Spike s; int block_lengths[2] = {1, 1}; MPI_Aint addresses[3]; MPI_Get_address(&s, &addresses[0]); MPI_Get_address(&(s.gid), &addresses[1]); MPI_Get_address(&(s.spiketime), &addresses[2]); MPI_Aint displacements[2] = {addresses[1] - addresses[0], addresses[2] - addresses[0]}; MPI_Datatype typelist[2] = {MPI_INT, MPI_DOUBLE}; MPI_Type_create_struct(2, block_lengths, displacements, typelist, &spike_type); MPI_Type_commit(&spike_type); } #if nrn_spikebuf_size > 0 static MPI_Datatype spikebuf_type; // Register type NRNMPI_Spikebuf static void make_spikebuf_type() { NRNMPI_Spikebuf s; int block_lengths[3] = {1, nrn_spikebuf_size, nrn_spikebuf_size}; MPI_Datatype typelist[3] = {MPI_INT, MPI_INT, MPI_DOUBLE}; MPI_Aint addresses[4]; MPI_Get_address(&s, &addresses[0]); MPI_Get_address(&(s.nspike), &addresses[1]); MPI_Get_address(&(s.gid[0]), &addresses[2]); MPI_Get_address(&(s.spiketime[0]), &addresses[3]); MPI_Aint displacements[3] = {addresses[1] - addresses[0], addresses[2] - addresses[0], addresses[3] - addresses[0]}; MPI_Type_create_struct(3, block_lengths, displacements, typelist, &spikebuf_type); MPI_Type_commit(&spikebuf_type); } #endif void wait_before_spike_exchange() { MPI_Barrier(nrnmpi_comm); } int nrnmpi_spike_exchange_impl(int* nin, NRNMPI_Spike* spikeout, int icapacity, NRNMPI_Spike** spikein, int& ovfl, int nout, NRNMPI_Spikebuf* spbufout, NRNMPI_Spikebuf* spbufin) { nrn_assert(spikein); Instrumentor::phase_begin("spike-exchange"); { Instrumentor::phase p("imbalance"); wait_before_spike_exchange(); } Instrumentor::phase_begin("communication"); if (!displs) { np = nrnmpi_numprocs_; displs = (int*) emalloc(np * sizeof(int)); displs[0] = 0; #if nrn_spikebuf_size > 0 make_spikebuf_type(); #endif } #if nrn_spikebuf_size == 0 MPI_Allgather(&nout, 1, MPI_INT, nin, 1, MPI_INT, nrnmpi_comm); int n = nin[0]; for (int i = 1; i < np; ++i) { displs[i] = n; n += nin[i]; } if (n) { if (icapacity < n) { icapacity = n + 10; free(*spikein); *spikein = (NRNMPI_Spike*) emalloc(icapacity * sizeof(NRNMPI_Spike)); } MPI_Allgatherv(spikeout, nout, spike_type, *spikein, nin, displs, spike_type, nrnmpi_comm); } #else MPI_Allgather(spbufout, 1, spikebuf_type, spbufin, 1, spikebuf_type, nrnmpi_comm); int novfl = 0; int n = spbufin[0].nspike; if (n > nrn_spikebuf_size) { nin[0] = n - nrn_spikebuf_size; novfl += nin[0]; } else { nin[0] = 0; } for (int i = 1; i < np; ++i) { displs[i] = novfl; int n1 = spbufin[i].nspike; n += n1; if (n1 > nrn_spikebuf_size) { nin[i] = n1 - nrn_spikebuf_size; novfl += nin[i]; } else { nin[i] = 0; } } if (novfl) { if (icapacity < novfl) { icapacity = novfl + 10; free(*spikein); *spikein = (NRNMPI_Spike*) emalloc(icapacity * sizeof(NRNMPI_Spike)); } int n1 = (nout > nrn_spikebuf_size) ? nout - nrn_spikebuf_size : 0; MPI_Allgatherv(spikeout, n1, spike_type, *spikein, nin, displs, spike_type, nrnmpi_comm); } ovfl = novfl; #endif Instrumentor::phase_end("communication"); Instrumentor::phase_end("spike-exchange"); return n; } /* The compressed spike format is restricted to the fixed step method and is a sequence of unsigned char. nspike = buf[0]*256 + buf[1] a sequence of spiketime, localgid pairs. There are nspike of them. spiketime is relative to the last transfer time in units of dt. note that this requires a mindelay < 256*dt. localgid is an unsigned int, unsigned short, or unsigned char in size depending on the range and thus takes 4, 2, or 1 byte respectively. To be machine independent we do our own byte coding. When the localgid range is smaller than the true gid range, the gid->PreSyn are remapped into hostid specific maps. If there are not many holes, i.e just about every spike from a source machine is delivered to some cell on a target machine, then instead of a hash map, a vector is used. The allgather sends the first part of the buf and the allgatherv buffer sends any overflow. */ int nrnmpi_spike_exchange_compressed_impl(int localgid_size, unsigned char*& spfixin_ovfl, int send_nspike, int* nin, int ovfl_capacity, unsigned char* spikeout_fixed, int ag_send_size, unsigned char* spikein_fixed, int& ovfl) { if (!displs) { np = nrnmpi_numprocs_; displs = (int*) emalloc(np * sizeof(int)); displs[0] = 0; } if (!byteovfl) { byteovfl = (int*) emalloc(np * sizeof(int)); } MPI_Allgather( spikeout_fixed, ag_send_size, MPI_BYTE, spikein_fixed, ag_send_size, MPI_BYTE, nrnmpi_comm); int novfl = 0; int ntot = 0; int bstot = 0; for (int i = 0; i < np; ++i) { displs[i] = bstot; int idx = i * ag_send_size; int n = spikein_fixed[idx++] * 256; n += spikein_fixed[idx++]; ntot += n; nin[i] = n; if (n > send_nspike) { int bs = 2 + n * (1 + localgid_size) - ag_send_size; byteovfl[i] = bs; bstot += bs; novfl += n - send_nspike; } else { byteovfl[i] = 0; } } if (novfl) { if (ovfl_capacity < novfl) { ovfl_capacity = novfl + 10; free(spfixin_ovfl); spfixin_ovfl = (unsigned char*) emalloc(ovfl_capacity * (1 + localgid_size) * sizeof(unsigned char)); } int bs = byteovfl[nrnmpi_myid_]; /* note that the spikeout_fixed buffer is one since the overflow is contiguous to the first part. But the spfixin_ovfl is completely separate from the spikein_fixed since the latter dynamically changes its size during a run. */ MPI_Allgatherv(spikeout_fixed + ag_send_size, bs, MPI_BYTE, spfixin_ovfl, byteovfl, displs, MPI_BYTE, nrnmpi_comm); } ovfl = novfl; return ntot; } int nrnmpi_int_allmax_impl(int x) { int result; MPI_Allreduce(&x, &result, 1, MPI_INT, MPI_MAX, nrnmpi_comm); return result; } extern void nrnmpi_int_alltoall_impl(int* s, int* r, int n) { MPI_Alltoall(s, n, MPI_INT, r, n, MPI_INT, nrnmpi_comm); } extern void nrnmpi_int_alltoallv_impl(const int* s, const int* scnt, const int* sdispl, int* r, int* rcnt, int* rdispl) { MPI_Alltoallv(s, scnt, sdispl, MPI_INT, r, rcnt, rdispl, MPI_INT, nrnmpi_comm); } extern void nrnmpi_dbl_alltoallv_impl(double* s, int* scnt, int* sdispl, double* r, int* rcnt, int* rdispl) { MPI_Alltoallv(s, scnt, sdispl, MPI_DOUBLE, r, rcnt, rdispl, MPI_DOUBLE, nrnmpi_comm); } /* following are for the partrans */ void nrnmpi_int_allgather_impl(int* s, int* r, int n) { MPI_Allgather(s, n, MPI_INT, r, n, MPI_INT, nrnmpi_comm); } double nrnmpi_dbl_allmin_impl(double x) { double result; MPI_Allreduce(&x, &result, 1, MPI_DOUBLE, MPI_MIN, nrnmpi_comm); return result; } double nrnmpi_dbl_allmax_impl(double x) { double result; MPI_Allreduce(&x, &result, 1, MPI_DOUBLE, MPI_MAX, nrnmpi_comm); return result; } void nrnmpi_barrier_impl() { MPI_Barrier(nrnmpi_comm); } double nrnmpi_dbl_allreduce_impl(double x, int type) { double result; MPI_Op tt; if (type == 1) { tt = MPI_SUM; } else if (type == 2) { tt = MPI_MAX; } else { tt = MPI_MIN; } MPI_Allreduce(&x, &result, 1, MPI_DOUBLE, tt, nrnmpi_comm); return result; } void nrnmpi_dbl_allreduce_vec_impl(double* src, double* dest, int cnt, int type) { MPI_Op tt; assert(src != dest); if (type == 1) { tt = MPI_SUM; } else if (type == 2) { tt = MPI_MAX; } else { tt = MPI_MIN; } MPI_Allreduce(src, dest, cnt, MPI_DOUBLE, tt, nrnmpi_comm); return; } void nrnmpi_long_allreduce_vec_impl(long* src, long* dest, int cnt, int type) { MPI_Op tt; assert(src != dest); if (type == 1) { tt = MPI_SUM; } else if (type == 2) { tt = MPI_MAX; } else { tt = MPI_MIN; } MPI_Allreduce(src, dest, cnt, MPI_LONG, tt, nrnmpi_comm); return; } #if NRN_MULTISEND static MPI_Comm multisend_comm; void nrnmpi_multisend_comm_impl() { if (!multisend_comm) { MPI_Comm_dup(MPI_COMM_WORLD, &multisend_comm); } } void nrnmpi_multisend_impl(NRNMPI_Spike* spk, int n, int* hosts) { MPI_Request r; for (int i = 0; i < n; ++i) { MPI_Isend(spk, 1, spike_type, hosts[i], 1, multisend_comm, &r); MPI_Request_free(&r); } } int nrnmpi_multisend_single_advance_impl(NRNMPI_Spike* spk) { int flag = 0; MPI_Status status; MPI_Iprobe(MPI_ANY_SOURCE, 1, multisend_comm, &flag, &status); if (flag) { MPI_Recv(spk, 1, spike_type, MPI_ANY_SOURCE, 1, multisend_comm, &status); } return flag; } int nrnmpi_multisend_conserve_impl(int nsend, int nrecv) { int tcnts[2]; tcnts[0] = nsend - nrecv; MPI_Allreduce(tcnts, tcnts + 1, 1, MPI_INT, MPI_SUM, multisend_comm); return tcnts[1]; } #endif /*NRN_MULTISEND*/ } // namespace coreneuron ================================================ FILE: coreneuron/mpi/lib/nrnmpi.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/utils/nrn_assert.h" #include "nrnmpi.hpp" #if _OPENMP #include #endif #include namespace coreneuron { MPI_Comm nrnmpi_world_comm; MPI_Comm nrnmpi_comm; int nrnmpi_numprocs_; int nrnmpi_myid_; static bool nrnmpi_under_nrncontrol_{false}; static void nrn_fatal_error(const char* msg) { if (nrnmpi_myid_ == 0) { printf("%s\n", msg); } nrnmpi_abort_impl(-1); } nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { // Execute at most once per launch. Avoid memory leak. static bool executed = false; if (executed) { return {nrnmpi_numprocs_, nrnmpi_myid_}; } nrnmpi_under_nrncontrol_ = true; if (!nrnmpi_initialized_impl()) { #if defined(_OPENMP) int required = MPI_THREAD_FUNNELED; int provided; nrn_assert(MPI_Init_thread(pargc, pargv, required, &provided) == MPI_SUCCESS); nrn_assert(required <= provided); #else nrn_assert(MPI_Init(pargc, pargv) == MPI_SUCCESS); #endif } nrn_assert(MPI_Comm_dup(MPI_COMM_WORLD, &nrnmpi_world_comm) == MPI_SUCCESS); nrn_assert(MPI_Comm_dup(nrnmpi_world_comm, &nrnmpi_comm) == MPI_SUCCESS); nrn_assert(MPI_Comm_rank(nrnmpi_world_comm, &nrnmpi_myid_) == MPI_SUCCESS); nrn_assert(MPI_Comm_size(nrnmpi_world_comm, &nrnmpi_numprocs_) == MPI_SUCCESS); nrnmpi_spike_initialize(); if (nrnmpi_myid_ == 0 && !is_quiet) { #if defined(_OPENMP) printf(" num_mpi=%d\n num_omp_thread=%d\n\n", nrnmpi_numprocs_, omp_get_max_threads()); #else printf(" num_mpi=%d\n\n", nrnmpi_numprocs_); #endif } executed = true; return {nrnmpi_numprocs_, nrnmpi_myid_}; } void nrnmpi_finalize_impl(void) { if (nrnmpi_under_nrncontrol_) { if (nrnmpi_initialized_impl()) { MPI_Comm_free(&nrnmpi_world_comm); MPI_Comm_free(&nrnmpi_comm); MPI_Finalize(); } } } // check if appropriate threading level supported (i.e. MPI_THREAD_FUNNELED) void nrnmpi_check_threading_support_impl() { int th = 0; MPI_Query_thread(&th); if (th < MPI_THREAD_FUNNELED) { nrn_fatal_error( "\n Current MPI library doesn't support MPI_THREAD_FUNNELED,\ \n Run without enabling multi-threading!"); } } bool nrnmpi_initialized_impl() { int flag = 0; MPI_Initialized(&flag); return flag != 0; } void nrnmpi_abort_impl(int errcode) { MPI_Abort(MPI_COMM_WORLD, errcode); } double nrnmpi_wtime_impl() { return MPI_Wtime(); } /** * Return local mpi rank within a shared memory node * * When performing certain operations, we need to know the rank of mpi * process on a given node. This function uses MPI 3 MPI_Comm_split_type * function and MPI_COMM_TYPE_SHARED key to find out the local rank. */ int nrnmpi_local_rank_impl() { int local_rank = 0; if (nrnmpi_initialized_impl()) { MPI_Comm local_comm; MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, nrnmpi_myid_, MPI_INFO_NULL, &local_comm); MPI_Comm_rank(local_comm, &local_rank); MPI_Comm_free(&local_comm); } return local_rank; } /** * Return number of ranks running on single shared memory node * * We use MPI 3 MPI_Comm_split_type function and MPI_COMM_TYPE_SHARED key to * determine number of mpi ranks within a shared memory node. */ int nrnmpi_local_size_impl() { int local_size = 1; if (nrnmpi_initialized_impl()) { MPI_Comm local_comm; MPI_Comm_split_type( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, nrnmpi_myid_, MPI_INFO_NULL, &local_comm); MPI_Comm_size(local_comm, &local_size); MPI_Comm_free(&local_comm); } return local_size; } /** * Write given buffer to a new file using MPI collective I/O * * For output like spikes, each rank has to write spike timing * information to a single file. This routine writes buffers * of length len1, len2, len3... at the offsets 0, 0+len1, * 0+len1+len2... offsets. This write op is a collective across * all ranks of the common MPI communicator used for spike exchange. * * @param filename Name of the file to write * @param buffer Buffer to write * @param length Length of the buffer to write */ void nrnmpi_write_file_impl(const std::string& filename, const char* buffer, size_t length) { MPI_File fh; MPI_Status status; // global offset into file unsigned long offset = 0; MPI_Exscan(&length, &offset, 1, MPI_UNSIGNED_LONG, MPI_SUM, nrnmpi_comm); int op_status = MPI_File_open( nrnmpi_comm, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (op_status != MPI_SUCCESS && nrnmpi_myid_ == 0) { std::cerr << "Error while opening output file " << filename << std::endl; abort(); } op_status = MPI_File_write_at_all(fh, offset, buffer, length, MPI_BYTE, &status); if (op_status != MPI_SUCCESS && nrnmpi_myid_ == 0) { std::cerr << "Error while writing output " << std::endl; abort(); } MPI_File_close(&fh); } } // namespace coreneuron ================================================ FILE: coreneuron/mpi/lib/nrnmpi.hpp ================================================ #pragma once // This file contains functions that does not go outside of the mpi library namespace coreneuron { extern int nrnmpi_numprocs_; extern int nrnmpi_myid_; void nrnmpi_spike_initialize(); } // namespace coreneuron ================================================ FILE: coreneuron/mpi/nrnmpi.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include #include #include "coreneuron/mpi/nrnmpiuse.h" #ifndef nrn_spikebuf_size #define nrn_spikebuf_size 0 #endif namespace coreneuron { struct NRNMPI_Spikebuf { int nspike; int gid[nrn_spikebuf_size]; double spiketime[nrn_spikebuf_size]; }; } // namespace coreneuron namespace coreneuron { struct NRNMPI_Spike { int gid; double spiketime; }; // Those functions and classes are part of a mechanism to dynamically or statically load mpi // functions struct mpi_function_base; struct mpi_manager_t { void register_function(mpi_function_base* ptr) { m_function_ptrs.push_back(ptr); } void resolve_symbols(void* dlsym_handle); private: std::vector m_function_ptrs; // true when symbols are resolved }; inline mpi_manager_t& mpi_manager() { static mpi_manager_t x; return x; } struct mpi_function_base { void resolve(void* dlsym_handle); operator bool() const { return m_fptr; } mpi_function_base(const char* name) : m_name{name} { mpi_manager().register_function(this); } protected: void* m_fptr{}; const char* m_name; }; // This could be done with a simpler // template struct function : function_base { ... }; // pattern in C++17... template struct mpi_function {}; #define cnrn_make_integral_constant_t(x) std::integral_constant, x> template struct mpi_function>: mpi_function_base { using mpi_function_base::mpi_function_base; template // in principle deducible from `function_ptr` auto operator()(Args&&... args) const { #ifdef CORENEURON_ENABLE_MPI_DYNAMIC // Dynamic MPI, m_fptr should have been initialised via dlsym. assert(m_fptr); return (*reinterpret_cast(m_fptr))(std::forward(args)...); #else // No dynamic MPI, use `fptr` directly. Will produce link errors if libmpi.so is not linked. return (*fptr)(std::forward(args)...); #endif } }; } // namespace coreneuron #include "coreneuron/mpi/nrnmpidec.h" ================================================ FILE: coreneuron/mpi/nrnmpidec.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /* This file is processed by mkdynam.sh and so it is important that the prototypes be of the form "type foo(type arg, ...)" */ #pragma once #include namespace coreneuron { /* from nrnmpi.cpp */ struct nrnmpi_init_ret_t { int numprocs; int myid; }; extern "C" nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet); extern mpi_function nrnmpi_init; extern "C" void nrnmpi_finalize_impl(void); extern mpi_function nrnmpi_finalize; extern "C" void nrnmpi_check_threading_support_impl(); extern mpi_function nrnmpi_check_threading_support; // Write given buffer to a new file using MPI collective I/O extern "C" void nrnmpi_write_file_impl(const std::string& filename, const char* buffer, size_t length); extern mpi_function nrnmpi_write_file; /* from mpispike.cpp */ extern "C" int nrnmpi_spike_exchange_impl(int* nin, NRNMPI_Spike* spikeout, int icapacity, NRNMPI_Spike** spikein, int& ovfl, int nout, NRNMPI_Spikebuf* spbufout, NRNMPI_Spikebuf* spbufin); extern mpi_function nrnmpi_spike_exchange; extern "C" int nrnmpi_spike_exchange_compressed_impl(int, unsigned char*&, int, int*, int, unsigned char*, int, unsigned char*, int& ovfl); extern mpi_function nrnmpi_spike_exchange_compressed; extern "C" int nrnmpi_int_allmax_impl(int i); extern mpi_function nrnmpi_int_allmax; extern "C" void nrnmpi_int_allgather_impl(int* s, int* r, int n); extern mpi_function nrnmpi_int_allgather; extern "C" void nrnmpi_int_alltoall_impl(int* s, int* r, int n); extern mpi_function nrnmpi_int_alltoall; extern "C" void nrnmpi_int_alltoallv_impl(const int* s, const int* scnt, const int* sdispl, int* r, int* rcnt, int* rdispl); extern mpi_function nrnmpi_int_alltoallv; extern "C" void nrnmpi_dbl_alltoallv_impl(double* s, int* scnt, int* sdispl, double* r, int* rcnt, int* rdispl); extern mpi_function nrnmpi_dbl_alltoallv; extern "C" double nrnmpi_dbl_allmin_impl(double x); extern mpi_function nrnmpi_dbl_allmin; extern "C" double nrnmpi_dbl_allmax_impl(double x); extern mpi_function nrnmpi_dbl_allmax; extern "C" void nrnmpi_barrier_impl(void); extern mpi_function nrnmpi_barrier; extern "C" double nrnmpi_dbl_allreduce_impl(double x, int type); extern mpi_function nrnmpi_dbl_allreduce; extern "C" void nrnmpi_dbl_allreduce_vec_impl(double* src, double* dest, int cnt, int type); extern mpi_function nrnmpi_dbl_allreduce_vec; extern "C" void nrnmpi_long_allreduce_vec_impl(long* src, long* dest, int cnt, int type); extern mpi_function nrnmpi_long_allreduce_vec; extern "C" bool nrnmpi_initialized_impl(); extern mpi_function nrnmpi_initialized; extern "C" void nrnmpi_abort_impl(int); extern mpi_function nrnmpi_abort; extern "C" double nrnmpi_wtime_impl(); extern mpi_function nrnmpi_wtime; extern "C" int nrnmpi_local_rank_impl(); extern mpi_function nrnmpi_local_rank; extern "C" int nrnmpi_local_size_impl(); extern mpi_function nrnmpi_local_size; #if NRN_MULTISEND extern "C" void nrnmpi_multisend_comm_impl(); extern mpi_function nrnmpi_multisend_comm; extern "C" void nrnmpi_multisend_impl(NRNMPI_Spike* spk, int n, int* hosts); extern mpi_function nrnmpi_multisend; extern "C" int nrnmpi_multisend_single_advance_impl(NRNMPI_Spike* spk); extern mpi_function nrnmpi_multisend_single_advance; extern "C" int nrnmpi_multisend_conserve_impl(int nsend, int nrecv); extern mpi_function nrnmpi_multisend_conserve; #endif } // namespace coreneuron ================================================ FILE: coreneuron/mpi/nrnmpiuse.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once /* define to 1 if you want MPI specific features activated (optionally provided by CMake option NRNMPI) */ #ifndef NRNMPI #define NRNMPI 1 #endif /* define to 1 if want multisend spike exchange available */ #ifndef NRN_MULTISEND #define NRN_MULTISEND 1 #endif /* define to 1 if you want parallel distributed cells (and gap junctions) */ #define PARANEURON 1 /* define to 1 if you want the MUSIC - MUlti SImulation Coordinator */ #undef NRN_MUSIC /* define to the dll path if you want to load automatically */ #undef DLL_DEFAULT_FNAME /* Number of times to retry a failed open */ #undef FILE_OPEN_RETRY /* Define to 1 for possibility of rank 0 xopen/ropen a file and broadcast everywhere */ #undef USE_NRNFILEWRAP ================================================ FILE: coreneuron/network/cvodestb.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/coreneuron.hpp" #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" // solver CVode stub to allow cvode as dll for mswindows version. #include "coreneuron/network/netcvode.hpp" #include "coreneuron/utils/vrecitem.h" #include "coreneuron/gpu/nrn_acc_manager.hpp" namespace coreneuron { // for fixed step thread // check thresholds and deliver all (including binqueue) events // up to t+dt/2 void deliver_net_events(NrnThread* nt) { if (net_cvode_instance) { net_cvode_instance->check_thresh(nt); net_cvode_instance->deliver_net_events(nt); } } // deliver events (but not binqueue) up to nt->_t void nrn_deliver_events(NrnThread* nt) { double tsav = nt->_t; if (net_cvode_instance) { net_cvode_instance->deliver_events(tsav, nt); } nt->_t = tsav; /*before executing on gpu, we have to update the NetReceiveBuffer_t on GPU */ update_net_receive_buffer(nt); for (auto& net_buf_receive: corenrn.get_net_buf_receive()) { (*net_buf_receive.first)(nt); } } void clear_event_queue() { if (net_cvode_instance) { net_cvode_instance->clear_events(); } } void init_net_events() { if (net_cvode_instance) { net_cvode_instance->init_events(); } #ifdef CORENEURON_ENABLE_GPU /* weight vectors could be updated (from INITIAL block of NET_RECEIVE, update those on GPU's */ for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread* nt = nrn_threads + ith; double* weights = nt->weights; int n_weight = nt->n_weight; if (n_weight && nt->compute_gpu) { nrn_pragma_acc(update device(weights [0:n_weight])) nrn_pragma_omp(target update to(weights [0:n_weight])) } } #endif } void nrn_play_init() { for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread* nt = nrn_threads + ith; for (int i = 0; i < nt->n_vecplay; ++i) { ((PlayRecord*) nt->_vecplay[i])->play_init(); } } } void fixed_play_continuous(NrnThread* nt) { for (int i = 0; i < nt->n_vecplay; ++i) { ((PlayRecord*) nt->_vecplay[i])->continuous(nt->_t); } } } // namespace coreneuron ================================================ FILE: coreneuron/network/have2want.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ /* To be included by a file that desires rendezvous rank exchange functionality. Need to define HAVEWANT_t, HAVEWANT_alltoallv, and HAVEWANT2Int */ #ifdef have2want_h #error "This implementation can only be included once" /* The static function names could involve a macro name. */ #endif #define have2want_h #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/mpi/core/nrnmpi.hpp" /* A rank owns a set of HAVEWANT_t keys and wants information associated with a set of HAVEWANT_t keys owned by unknown ranks. Owners do not know which ranks want their information. Ranks that want info do not know which ranks own that info. The have_to_want function returns two new vectors of keys along with associated count and displacement vectors of length nrnmpi_numprocs and nrnmpi_numprocs+1 respectively. Note that a send_to_want_displ[i+1] = send_to_want_cnt[i] + send_to_want_displ[i] . send_to_want[send_to_want_displ[i] to send_to_want_displ[i+1]] contains the keys from this rank for which rank i wants information. recv_from_have[recv_from_have_displ[i] to recv_from_have_displ[i+1] contains the keys from which rank i is sending information to this rank. Note that on rank i, the order of keys in the rank j area of send_to_want is the same order of keys on rank j in the ith area in recv_from_have. The rendezvous_rank function is used to parallelize this computation and minimize memory usage so that no single rank ever needs to know all keys. */ #ifndef HAVEWANT_t #define HAVEWANT_t int #endif namespace coreneuron { // round robin default rendezvous rank function static int default_rendezvous(HAVEWANT_t key) { return key % nrnmpi_numprocs; } static int* cnt2displ(int* cnt) { int* displ = new int[nrnmpi_numprocs + 1]; displ[0] = 0; for (int i = 0; i < nrnmpi_numprocs; ++i) { displ[i + 1] = displ[i] + cnt[i]; } return displ; } static int* srccnt2destcnt(int* srccnt) { int* destcnt = new int[nrnmpi_numprocs]; #if NRNMPI if (corenrn_param.mpi_enable) { nrnmpi_int_alltoall(srccnt, destcnt, 1); } else #endif { for (int i = 0; i < nrnmpi_numprocs; ++i) { destcnt[i] = srccnt[i]; } } return destcnt; } static void rendezvous_rank_get(HAVEWANT_t* data, int size, HAVEWANT_t*& sdata, int*& scnt, int*& sdispl, HAVEWANT_t*& rdata, int*& rcnt, int*& rdispl, int (*rendezvous_rank)(HAVEWANT_t)) { // count what gets sent scnt = new int[nrnmpi_numprocs]; for (int i = 0; i < nrnmpi_numprocs; ++i) { scnt[i] = 0; } for (int i = 0; i < size; ++i) { int r = (*rendezvous_rank)(data[i]); ++scnt[r]; } sdispl = cnt2displ(scnt); rcnt = srccnt2destcnt(scnt); rdispl = cnt2displ(rcnt); sdata = new HAVEWANT_t[sdispl[nrnmpi_numprocs]]; rdata = new HAVEWANT_t[rdispl[nrnmpi_numprocs]]; // scatter data into sdata by recalculating scnt. for (int i = 0; i < nrnmpi_numprocs; ++i) { scnt[i] = 0; } for (int i = 0; i < size; ++i) { int r = (*rendezvous_rank)(data[i]); sdata[sdispl[r] + scnt[r]] = data[i]; ++scnt[r]; } #if NRNMPI if (corenrn_param.mpi_enable) { HAVEWANT_alltoallv(sdata, scnt, sdispl, rdata, rcnt, rdispl); } else #endif { for (int i = 0; i < sdispl[nrnmpi_numprocs]; ++i) { rdata[i] = sdata[i]; } } } static void have_to_want(HAVEWANT_t* have, int have_size, HAVEWANT_t* want, int want_size, HAVEWANT_t*& send_to_want, int*& send_to_want_cnt, int*& send_to_want_displ, HAVEWANT_t*& recv_from_have, int*& recv_from_have_cnt, int*& recv_from_have_displ, int (*rendezvous_rank)(HAVEWANT_t)) { // 1) Send have and want to the rendezvous ranks. // 2) Rendezvous rank matches have and want. // 3) Rendezvous ranks tell the want ranks which ranks own the keys // 4) Ranks that want tell owner ranks where to send. // 1) Send have and want to the rendezvous ranks. HAVEWANT_t *have_s_data, *have_r_data; int *have_s_cnt, *have_s_displ, *have_r_cnt, *have_r_displ; rendezvous_rank_get(have, have_size, have_s_data, have_s_cnt, have_s_displ, have_r_data, have_r_cnt, have_r_displ, rendezvous_rank); // assume it is an error if two ranks have the same key so create // hash table of key2rank. Will also need it for matching have and want HAVEWANT2Int havekey2rank = HAVEWANT2Int(); for (int r = 0; r < nrnmpi_numprocs; ++r) { for (int i = 0; i < have_r_cnt[r]; ++i) { HAVEWANT_t key = have_r_data[have_r_displ[r] + i]; if (havekey2rank.find(key) != havekey2rank.end()) { char buf[200]; sprintf(buf, "key %lld owned by multiple ranks\n", (long long) key); hoc_execerror(buf, 0); } havekey2rank[key] = r; } } delete[] have_s_data; delete[] have_s_cnt; delete[] have_s_displ; delete[] have_r_data; delete[] have_r_cnt; delete[] have_r_displ; HAVEWANT_t *want_s_data, *want_r_data; int *want_s_cnt, *want_s_displ, *want_r_cnt, *want_r_displ; rendezvous_rank_get(want, want_size, want_s_data, want_s_cnt, want_s_displ, want_r_data, want_r_cnt, want_r_displ, rendezvous_rank); // 2) Rendezvous rank matches have and want. // we already have made the havekey2rank map. // Create an array parallel to want_r_data which contains the ranks that // have that data. int n = want_r_displ[nrnmpi_numprocs]; int* want_r_ownerranks = new int[n]; for (int r = 0; r < nrnmpi_numprocs; ++r) { for (int i = 0; i < want_r_cnt[r]; ++i) { int ix = want_r_displ[r] + i; HAVEWANT_t key = want_r_data[ix]; if (havekey2rank.find(key) == havekey2rank.end()) { char buf[200]; sprintf(buf, "key = %lld is wanted but does not exist\n", (long long) key); hoc_execerror(buf, 0); } want_r_ownerranks[ix] = havekey2rank[key]; } } delete[] want_r_data; // 3) Rendezvous ranks tell the want ranks which ranks own the keys // The ranks that want keys need to know the ranks that own those keys. // The want_s_ownerranks will be parallel to the want_s_data. // That is, each item defines the rank from which information associated // with that key is coming from int* want_s_ownerranks = new int[want_s_displ[nrnmpi_numprocs]]; #if NRNMPI if (corenrn_param.mpi_enable) { nrnmpi_int_alltoallv(want_r_ownerranks, want_r_cnt, want_r_displ, want_s_ownerranks, want_s_cnt, want_s_displ); } else #endif { for (int i = 0; i < want_r_displ[nrnmpi_numprocs]; ++i) { want_s_ownerranks[i] = want_r_ownerranks[i]; } } delete[] want_r_ownerranks; delete[] want_r_cnt; delete[] want_r_displ; // 4) Ranks that want tell owner ranks where to send. // Finished with the rendezvous ranks. The ranks that want keys know the // owner ranks for those keys. The next step is for the want ranks to // tell the owner ranks where to send. // The parallel want_s_ownerranks and want_s_data are now uselessly ordered // by rendezvous rank. Reorganize so that want ranks can tell owner ranks // what they want. n = want_s_displ[nrnmpi_numprocs]; delete[] want_s_displ; for (int i = 0; i < nrnmpi_numprocs; ++i) { want_s_cnt[i] = 0; } HAVEWANT_t* old_want_s_data = want_s_data; want_s_data = new HAVEWANT_t[n]; // compute the counts for (int i = 0; i < n; ++i) { int r = want_s_ownerranks[i]; ++want_s_cnt[r]; } want_s_displ = cnt2displ(want_s_cnt); for (int i = 0; i < nrnmpi_numprocs; ++i) { want_s_cnt[i] = 0; } // recount while filling for (int i = 0; i < n; ++i) { int r = want_s_ownerranks[i]; HAVEWANT_t key = old_want_s_data[i]; want_s_data[want_s_displ[r] + want_s_cnt[r]] = key; ++want_s_cnt[r]; } delete[] want_s_ownerranks; delete[] old_want_s_data; want_r_cnt = srccnt2destcnt(want_s_cnt); want_r_displ = cnt2displ(want_r_cnt); want_r_data = new HAVEWANT_t[want_r_displ[nrnmpi_numprocs]]; #if NRNMPI if (corenrn_param.mpi_enable) { HAVEWANT_alltoallv( want_s_data, want_s_cnt, want_s_displ, want_r_data, want_r_cnt, want_r_displ); } else #endif { for (int i = 0; i < want_s_displ[nrnmpi_numprocs]; ++i) { want_r_data[i] = want_s_data[i]; } } // now the want_r_data on the have_ranks are grouped according to the ranks // that want those keys. send_to_want = want_r_data; send_to_want_cnt = want_r_cnt; send_to_want_displ = want_r_displ; recv_from_have = want_s_data; recv_from_have_cnt = want_s_cnt; recv_from_have_displ = want_s_displ; } } // namespace coreneuron ================================================ FILE: coreneuron/network/multisend.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/network/multisend.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/network/netcvode.hpp" /* Overall exchange strategy When a cell spikes, it immediately does a multisend of (int gid, double spiketime) to all the target machines that have cells that need to receive this spike by spiketime + delay. The MPI implementation does not block due to use of MPI_Isend. In order to minimize the number of nrnmpi_multisend_conserve tests (and potentially abandon them altogether if I can ever guarantee that exchange time is less than half the computation time), I divide the minimum delay integration intervals into two equal subintervals. So if a spike is generated in an even subinterval, I do not have to include it in the conservation check until the end of the next even subinterval. When a spike is received (generally MPI_Iprobe, MPI_Recv) it is placed in even or odd buffers (depending on whether the coded gid is positive or negative) At the end of a computation subinterval the even or odd buffer spikes are enqueued in the priority queue after checking that the number of spikes sent is equal to the number of spikes sent. */ // The initial idea behind the optional phase2 is to avoid the large overhead of // initiating a send of the up to 10k list of target hosts when a cell fires. // I.e. when there are a small number of cells on a processor, this causes // load balance problems. // Load balance should be better if the send is distributed to a much smaller // set of targets, which, when they receive the spike, pass it on to a neighbor // set. A non-exclusive alternative to this is the use of RECORD_REPLAY // which give a very fast initiation but we have not been able to get that // to complete in the sense of all the targets receiving their spikes before // the conservation step. // We expect that phase2 will work best in combination with ENQUEUE=2 // which has the greatest amount of overlap between computation // and communication. namespace coreneuron { bool use_multisend_; bool use_phase2_; int n_multisend_interval = 2; #if NRN_MULTISEND static int n_xtra_cons_check_; #define MAXNCONS 10 #if MAXNCONS static int xtra_cons_hist_[MAXNCONS + 1]; #endif // ENQUEUE 0 means to Multisend_ReceiveBuffer buffer -> InputPreSyn.send // ENQUEUE 1 means to Multisend_ReceiveBuffer buffer -> psbuf -> InputPreSyn.send // ENQUEUE 2 means to Multisend_ReceiveBuffer.incoming -> InputPrySyn.send // Note that ENQUEUE 2 give more overlap between computation and exchange // since the enqueuing takes place during computation except for those // remaining during conservation. #define ENQUEUE 2 #if ENQUEUE == 2 static unsigned long enq2_find_time_; static unsigned long enq2_enqueue_time_; // includes enq_find_time_ #endif #define PHASE2BUFFER_SIZE 2048 // power of 2 #define PHASE2BUFFER_MASK (PHASE2BUFFER_SIZE - 1) struct Phase2Buffer { InputPreSyn* ps; double spiketime; int gid; }; #define MULTISEND_RECEIVEBUFFER_SIZE 10000 class Multisend_ReceiveBuffer { public: Multisend_ReceiveBuffer(); virtual ~Multisend_ReceiveBuffer(); void init(int index); void incoming(int gid, double spiketime); void enqueue(); int index_{}; int size_{MULTISEND_RECEIVEBUFFER_SIZE}; int count_{}; int maxcount_{}; bool busy_{}; int nsend_{}, nrecv_{}; // for checking conservation int nsend_cell_{}; // cells that spiked this interval. NRNMPI_Spike** buffer_{}; void enqueue1(); void enqueue2(); InputPreSyn** psbuf_{}; void phase2send(); int phase2_head_{}; int phase2_tail_{}; int phase2_nsend_cell_{}, phase2_nsend_{}; Phase2Buffer* phase2_buffer_{}; }; #define MULTISEND_INTERVAL 2 static Multisend_ReceiveBuffer* multisend_receive_buffer[MULTISEND_INTERVAL]; static int current_rbuf, next_rbuf; #if MULTISEND_INTERVAL == 2 // note that if a spike is supposed to be received by multisend_receive_buffer[1] // then during transmission its gid is complemented. #endif static int* targets_phase1_; static int* targets_phase2_; void nrn_multisend_send(PreSyn* ps, double t, NrnThread* nt) { int i = ps->multisend_index_; if (i >= 0) { // format is cnt, cnt_phase1, array of target ranks. // Valid for one or two phase. int* ranks = targets_phase1_ + i; int cnt = ranks[0]; int cnt_phase1 = ranks[1]; ranks += 2; NRNMPI_Spike spk; spk.gid = ps->output_index_; spk.spiketime = t; if (next_rbuf == 1) { spk.gid = ~spk.gid; } if (nt == nrn_threads) { multisend_receive_buffer[next_rbuf]->nsend_ += cnt; multisend_receive_buffer[next_rbuf]->nsend_cell_ += 1; nrnmpi_multisend(&spk, cnt_phase1, ranks); } else { assert(0); } } } static void multisend_send_phase2(InputPreSyn* ps, int gid, double t) { int i = ps->multisend_phase2_index_; assert(i >= 0); // format is cnt_phase2, array of target ranks int* ranks = targets_phase2_ + i; int cnt_phase2 = ranks[0]; ranks += 1; NRNMPI_Spike spk; spk.gid = gid; spk.spiketime = t; nrnmpi_multisend(&spk, cnt_phase2, ranks); } Multisend_ReceiveBuffer::Multisend_ReceiveBuffer() : buffer_ { new NRNMPI_Spike*[size_] } #if ENQUEUE == 1 , psbuf_ { new InputPreSyn*[size_] } #endif , phase2_buffer_{new Phase2Buffer[PHASE2BUFFER_SIZE]} {} Multisend_ReceiveBuffer::~Multisend_ReceiveBuffer() { nrn_assert(!busy_); for (int i = 0; i < count_; ++i) { delete buffer_[i]; } delete[] buffer_; if (psbuf_) delete[] psbuf_; delete[] phase2_buffer_; } void Multisend_ReceiveBuffer::init(int index) { index_ = index; nsend_cell_ = nsend_ = nrecv_ = maxcount_ = 0; busy_ = false; for (int i = 0; i < count_; ++i) { delete buffer_[i]; } count_ = 0; phase2_head_ = phase2_tail_ = 0; phase2_nsend_cell_ = phase2_nsend_ = 0; } void Multisend_ReceiveBuffer::incoming(int gid, double spiketime) { // printf("%d %p.incoming %g %g %d\n", nrnmpi_myid, this, t, spk->spiketime, spk->gid); nrn_assert(!busy_); busy_ = true; if (count_ >= size_) { size_ *= 2; NRNMPI_Spike** newbuf = new NRNMPI_Spike*[size_]; for (int i = 0; i < count_; ++i) { newbuf[i] = buffer_[i]; } delete[] buffer_; buffer_ = newbuf; if (psbuf_) { delete[] psbuf_; psbuf_ = new InputPreSyn*[size_]; } } NRNMPI_Spike* spk = new NRNMPI_Spike(); spk->gid = gid; spk->spiketime = spiketime; buffer_[count_++] = spk; if (maxcount_ < count_) { maxcount_ = count_; } ++nrecv_; busy_ = false; } void Multisend_ReceiveBuffer::enqueue() { // printf("%d %p.enqueue count=%d t=%g nrecv=%d nsend=%d\n", nrnmpi_myid, this, t, count_, // nrecv_, nsend_); nrn_assert(!busy_); busy_ = true; for (int i = 0; i < count_; ++i) { NRNMPI_Spike* spk = buffer_[i]; auto gid2in_it = gid2in.find(spk->gid); assert(gid2in_it != gid2in.end()); InputPreSyn* ps = gid2in_it->second; if (use_phase2_ && ps->multisend_phase2_index_ >= 0) { Phase2Buffer& pb = phase2_buffer_[phase2_head_++]; phase2_head_ &= PHASE2BUFFER_MASK; assert(phase2_head_ != phase2_tail_); pb.ps = ps; pb.spiketime = spk->spiketime; pb.gid = spk->gid; } ps->send(spk->spiketime, net_cvode_instance, nrn_threads); delete spk; } count_ = 0; #if ENQUEUE != 2 nrecv_ = 0; nsend_ = 0; nsend_cell_ = 0; #endif busy_ = false; phase2send(); } void Multisend_ReceiveBuffer::enqueue1() { // printf("%d %lx.enqueue count=%d t=%g nrecv=%d nsend=%d\n", nrnmpi_myid, (long)this, t, // count_, nrecv_, nsend_); nrn_assert(!busy_); busy_ = true; for (int i = 0; i < count_; ++i) { NRNMPI_Spike* spk = buffer_[i]; auto gid2in_it = gid2in.find(spk->gid); assert(gid2in_it != gid2in.end()); InputPreSyn* ps = gid2in_it->second; psbuf_[i] = ps; if (use_phase2_ && ps->multisend_phase2_index_ >= 0) { Phase2Buffer& pb = phase2_buffer_[phase2_head_++]; phase2_head_ &= PHASE2BUFFER_MASK; assert(phase2_head_ != phase2_tail_); pb.ps = ps; pb.spiketime = spk->spiketime; pb.gid = spk->gid; } } busy_ = false; phase2send(); } void Multisend_ReceiveBuffer::enqueue2() { // printf("%d %lx.enqueue count=%d t=%g nrecv=%d nsend=%d\n", nrnmpi_myid, (long)this, t, // count_, nrecv_, nsend_); nrn_assert(!busy_); busy_ = false; for (int i = 0; i < count_; ++i) { NRNMPI_Spike* spk = buffer_[i]; InputPreSyn* ps = psbuf_[i]; ps->send(spk->spiketime, net_cvode_instance, nrn_threads); delete spk; } count_ = 0; nrecv_ = 0; nsend_ = 0; nsend_cell_ = 0; busy_ = false; } void Multisend_ReceiveBuffer::phase2send() { while (phase2_head_ != phase2_tail_) { Phase2Buffer& pb = phase2_buffer_[phase2_tail_++]; phase2_tail_ &= PHASE2BUFFER_MASK; int gid = pb.gid; if (index_) { gid = ~gid; } multisend_send_phase2(pb.ps, gid, pb.spiketime); } } static int max_ntarget_host; // For one phase sending, max_multisend_targets is max_ntarget_host. // For two phase sending, it is the maximum of all the // ntarget_hosts_phase1 and ntarget_hosts_phase2. static int max_multisend_targets; void nrn_multisend_init() { for (int i = 0; i < n_multisend_interval; ++i) { multisend_receive_buffer[i]->init(i); } current_rbuf = 0; next_rbuf = n_multisend_interval - 1; #if ENQUEUE == 2 enq2_find_time_ = enq2_enqueue_time_ = 0; #endif n_xtra_cons_check_ = 0; #if MAXNCONS for (int i = 0; i <= MAXNCONS; ++i) { xtra_cons_hist_[i] = 0; } #endif // MAXNCONS } static int multisend_advance() { NRNMPI_Spike spk; int i = 0; while (nrnmpi_multisend_single_advance(&spk)) { i += 1; int j = 0; #if MULTISEND_INTERVAL == 2 if (spk.gid < 0) { spk.gid = ~spk.gid; j = 1; } #endif multisend_receive_buffer[j]->incoming(spk.gid, spk.spiketime); } return i; } #if NRN_MULTISEND void nrn_multisend_advance() { if (use_multisend_) { multisend_advance(); #if ENQUEUE == 2 multisend_receive_buffer[current_rbuf]->enqueue(); #endif } } #endif void nrn_multisend_receive(NrnThread* nt) { // nrn_spike_exchange(); assert(nt == nrn_threads); // double w1, w2; int ncons = 0; int& s = multisend_receive_buffer[current_rbuf]->nsend_; int& r = multisend_receive_buffer[current_rbuf]->nrecv_; // w1 = nrn_wtime(); #if NRN_MULTISEND & 1 if (use_multisend_) { nrn_multisend_advance(); nrnmpi_barrier(); nrn_multisend_advance(); // with two phase we expect conservation to hold and ncons should // be 0. while (nrnmpi_multisend_conserve(s, r) != 0) { nrn_multisend_advance(); ++ncons; } } #endif // w1 = nrn_wtime() - w1; // w2 = nrn_wtime(); #if ENQUEUE == 0 multisend_receive_buffer[current_rbuf]->enqueue(); #endif #if ENQUEUE == 1 multisend_receive_buffer[current_rbuf]->enqueue1(); multisend_receive_buffer[current_rbuf]->enqueue2(); #endif #if ENQUEUE == 2 multisend_receive_buffer[current_rbuf]->enqueue(); s = r = multisend_receive_buffer[current_rbuf]->nsend_cell_ = 0; multisend_receive_buffer[current_rbuf]->phase2_nsend_cell_ = 0; multisend_receive_buffer[current_rbuf]->phase2_nsend_ = 0; enq2_find_time_ = 0; enq2_enqueue_time_ = 0; #endif // ENQUEUE == 2 // wt1_ = nrn_wtime() - w2; // wt_ = w1; #if MULTISEND_INTERVAL == 2 // printf("%d reverse buffers %g\n", nrnmpi_myid, t); if (n_multisend_interval == 2) { current_rbuf = next_rbuf; next_rbuf = ((next_rbuf + 1) & 1); } #endif } void nrn_multisend_cleanup() { if (targets_phase1_) { delete[] targets_phase1_; targets_phase1_ = nullptr; } if (targets_phase2_) { delete[] targets_phase2_; targets_phase2_ = nullptr; } // cleanup MultisendReceiveBuffer here as well } void nrn_multisend_setup() { nrn_multisend_cleanup(); if (!use_multisend_) { return; } nrnmpi_multisend_comm(); // if (nrnmpi_myid == 0) printf("multisend_setup()\n"); // although we only care about the set of hosts that gid2out_ // sends spikes to (source centric). We do not want to send // the entire list of gid2in (which may be 10000 times larger // than gid2out) from every machine to every machine. // so we accomplish the task in two phases the first of which // involves allgather with a total receive buffer size of number // of cells (even that is too large and we will split it up // into chunks). And the second, an // allreduce with receive buffer size of number of hosts. max_ntarget_host = 0; max_multisend_targets = 0; // completely new algorithm does one and two phase. nrn_multisend_setup_targets(use_phase2_, targets_phase1_, targets_phase2_); if (!multisend_receive_buffer[0]) { multisend_receive_buffer[0] = new Multisend_ReceiveBuffer(); } #if MULTISEND_INTERVAL == 2 if (n_multisend_interval == 2 && !multisend_receive_buffer[1]) { multisend_receive_buffer[1] = new Multisend_ReceiveBuffer(); } #endif } #endif // NRN_MULTISEND } // namespace coreneuron ================================================ FILE: coreneuron/network/multisend.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/mpi/nrnmpiuse.h" namespace coreneuron { extern bool use_multisend_; extern int n_multisend_interval; extern bool use_phase2_; class PreSyn; struct NrnThread; void nrn_multisend_send(PreSyn*, double t, NrnThread*); void nrn_multisend_receive(NrnThread*); // must be thread 0 void nrn_multisend_advance(); void nrn_multisend_init(); void nrn_multisend_cleanup(); void nrn_multisend_setup(); void nrn_multisend_setup_targets(bool use_phase2, int*& targets_phase1, int*& targets_phase2); } // namespace coreneuron ================================================ FILE: coreneuron/network/multisend_setup.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #if CORENRN_DEBUG #include #include #endif #include "coreneuron/utils/randoms/nrnran123.h" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/network/multisend.hpp" #include "coreneuron/mpi/nrnmpidec.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/utils/memory_utils.h" #include "coreneuron/utils/utils.hpp" /* For very large numbers of processors and cells and fanout, it is taking a long time to figure out each cells target list given the input gids (gid2in) on each host. e.g 240 seconds for 2^25 cells, 1k connections per cell, and 128K cores; and 340 seconds for two phase excchange. To reduce this setup time we experiment with a very different algorithm in which we construct a gid target host list on host gid%nhost and copy that list to the source host owning the gid. */ #if NRN_MULTISEND namespace coreneuron { using Gid2IPS = std::map; using Gid2PS = std::map; #if CORENRN_DEBUG template static void celldebug(const char* p, T& map) { std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid); std::ofstream f(fname, std::ios::app); f << std::endl << p << std::endl; int rank = nrnmpi_myid; f << " " << std::setw(2) << std::setfill('0') << rank << ":"; for (const auto& m: map) { int gid = m.first; f << " " << std::setw(2) << std::setfill('0') << gid << ":"; } f << std::endl; } static void alltoalldebug(const char* p, const std::vector& s, const std::vector& scnt, const std::vector& sdispl, const std::vector& r, const std::vector& rcnt, const std::vector& rdispl) { std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid); std::ofstream f(fname, std::ios::app); f << std::endl << p << std::endl; int rank = nrnmpi_myid; f << " rank " << rank << std::endl; for (int i = 0; i < nrnmpi_numprocs; ++i) { f << " s" << i << " : " << scnt[i] << " " << sdispl[i] << " :"; for (int j = sdispl[i]; j < sdispl[i + 1]; ++j) { f << " " << std::setw(2) << std::setfill('0') << s[j] << ":"; } f << std::endl; } for (int i = 0; i < nrnmpi_numprocs; ++i) { f << " r" << i << " : " << rcnt[i] << " " << rdispl[i] << " :"; for (int j = rdispl[i]; j < rdispl[i + 1]; ++j) { f << " " << std::setw(2) << std::setfill('0') << r[j] << ":"; } f << std::endl; } } #else template static void celldebug(const char*, T&) {} static void alltoalldebug(const char*, const std::vector&, const std::vector&, const std::vector&, const std::vector&, const std::vector&, const std::vector&) {} #endif #if CORENRN_DEBUG void phase1debug(int* targets_phase1) { std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid); std::ofstream f(fname, std::ios::app); f << std::endl << "phase1debug " << nrnmpi_myid; for (auto& g: gid2out) { PreSyn* ps = g.second; f << std::endl << " " << std::setw(2) << std::setfill('0') << ps->gid_ << ":"; int* ranks = targets_phase1 + ps->multisend_index_; int n = ranks[1]; ranks += 2; for (int i = 0; i < n; ++i) { f << " " << std::setw(2) << std::setfill('0') << ranks[i]; } } f << std::endl; } void phase2debug(int* targets_phase2) { std::string fname = std::string("debug.") + std::to_string(nrnmpi_myid); std::ofstream f(fname, std::ios::app); f << std::endl << "phase2debug " << nrnmpi_myid; for (auto& g: gid2in) { int gid = g.first; InputPreSyn* ps = g.second; f << std::endl << " " << std::setw(2) << std::setfill('0') << gid << ":"; int j = ps->multisend_phase2_index_; if (j >= 0) { int* ranks = targets_phase2 + j; int cnt = ranks[0]; ranks += 1; for (int i = 0; i < cnt; ++i) { f << " " << std::setw(2) << std::setfill('0') << ranks[i]; } } } f << std::endl; } #endif static std::vector newoffset(const std::vector& acnt) { std::vector aoff(acnt.size() + 1); aoff[0] = 0; std::partial_sum(acnt.begin(), acnt.end(), aoff.begin() + 1); return aoff; } // input: scnt, sdispl; output: rcnt, rdispl static std::pair, std::vector> all2allv_helper(const std::vector& scnt) { int np = nrnmpi_numprocs; std::vector c(np, 1); std::vector rdispl = newoffset(c); std::vector rcnt(np, 0); nrnmpi_int_alltoallv( scnt.data(), c.data(), rdispl.data(), rcnt.data(), c.data(), rdispl.data()); rdispl = newoffset(rcnt); return std::make_pair(std::move(rcnt), std::move(rdispl)); } /* define following to 1 if desire space/performance information such as: all2allv_int gidin to intermediate space=1552 total=37345104 time=0.000495835 all2allv_int gidout space=528 total=37379376 time=1.641e-05 all2allv_int lists space=3088 total=37351312 time=4.4708e-05 */ #define all2allv_perf 0 // input: s, scnt, sdispl; output: r, rdispl static std::pair, std::vector> all2allv_int(const std::vector& s, const std::vector& scnt, const std::vector& sdispl, const char* dmes) { #if all2allv_perf double tm = nrn_wtime(); #endif int np = nrnmpi_numprocs; std::vector rcnt; std::vector rdispl; std::tie(rcnt, rdispl) = all2allv_helper(scnt); std::vector r(rdispl[np], 0); nrnmpi_int_alltoallv( s.data(), scnt.data(), sdispl.data(), r.data(), rcnt.data(), rdispl.data()); alltoalldebug(dmes, s, scnt, sdispl, r, rcnt, rdispl); #if all2allv_perf if (nrnmpi_myid == 0) { int nb = 4 * nrnmpi_numprocs + sdispl[nrnmpi_numprocs] + rdispl[nrnmpi_numprocs]; tm = nrn_wtime() - tm; printf("all2allv_int %s space=%d total=%g time=%g\n", dmes, nb, nrn_mallinfo(), tm); } #endif return std::make_pair(std::move(r), std::move(rdispl)); } class TarList { public: TarList(); virtual ~TarList(); virtual void alloc(); int size; int* list; int rank; int* indices; // indices of list for groups of phase2 targets. // If indices is not null, then size is one less than // the size of the indices list where indices[size] = the size of // the list. Indices[0] is 0 and list[indices[i]] is the rank // to send the ith group of phase2 targets. }; using Int2TarList = std::map; TarList::TarList() : size(0) , list(nullptr) , rank(-1) , indices(nullptr) {} TarList::~TarList() { delete[] list; delete[] indices; } void TarList::alloc() { if (size) { list = new int[size]; } } // for two phase static nrnran123_State* ranstate{nullptr}; static void random_init(int i) { if (!ranstate) { ranstate = nrnran123_newstream(i, 0); } } static unsigned int get_random() { return nrnran123_ipick(ranstate); } // Avoid warnings if the global index is changed on subsequent psolve. static void random_delete() { if (ranstate) { nrnran123_deletestream(ranstate); ranstate = nullptr; } } static int iran(int i1, int i2) { // discrete uniform random integer from i2 to i2 inclusive. Must // work if i1 == i2 if (i1 == i2) { return i1; } int i3 = i1 + get_random() % (i2 - i1 + 1); return i3; } static void phase2organize(TarList* tl) { int nt = tl->size; int n = int(sqrt(double(nt))); // change to about 20 if (n > 1) { // do not bother if not many connections // equal as possible group sizes tl->indices = new int[n + 1]; tl->indices[n] = tl->size; tl->size = n; for (int i = 0; i < n; ++i) { tl->indices[i] = (i * nt) / n; } // Note: not sure the following is true anymore but it could be. // This distribution is very biased (if 0 is a phase1 target // it is always a phase2 sender. So now choose a random // target in the subset and make that the phase2 sender // (need to switch the indices[i] target and the one chosen) for (int i = 0; i < n; ++i) { int i1 = tl->indices[i]; int i2 = tl->indices[i + 1] - 1; // need discrete uniform random integer from i1 to i2 int i3 = iran(i1, i2); int itar = tl->list[i1]; tl->list[i1] = tl->list[i3]; tl->list[i3] = itar; } } } // end of twophase /* Setting up target lists uses a lot of temporary memory. It is conceiveable that this can be done prior to creating any cells or connections. I.e. gid2out is presently known from pc.set_gid2node(gid,...). Gid2in is presenly known from NetCon = pc.gid_connect(gid, target) and it is quite a style and hoc network programming change to use something like pc.need_gid(gid) before cells with their synapses are created since one would have to imagine that the hoc network setup code would have to be executed in a virtual or 'abstract' fashion without actually creating, cells, targets, or NetCons. Anyway, to potentially support this in the future, we write setup_target_lists to not use any PreSyn information. */ static std::vector setup_target_lists(bool); static void fill_multisend_lists(bool, const std::vector&, int*&, int*&); void nrn_multisend_setup_targets(bool use_phase2, int*& targets_phase1, int*& targets_phase2) { auto r = setup_target_lists(use_phase2); // initialize as unused for (auto& g: gid2out) { PreSyn* ps = g.second; ps->multisend_index_ = -1; } // Only will be not -1 if non-nullptr input is a phase 2 sender. for (auto& g: gid2in) { InputPreSyn* ps = g.second; ps->multisend_phase2_index_ = -1; } fill_multisend_lists(use_phase2, r, targets_phase1, targets_phase2); // phase1debug(targets_phase1); // phase2debug(targets_phase2); } // Some notes about threads and the rank lists. // Assume all MPI message sent and received from a single thread (0). // gid2in and gid2out are rank wide lists for all threads // static void fill_multisend_lists(bool use_phase2, const std::vector& r, int*& targets_phase1, int*& targets_phase2) { // sequence of gid, size, [totalsize], list // Note that totalsize is there only for output gid's and use_phase2. // Using this sequence, copy lists to proper phase // 1 and phase 2 lists. (Phase one lists found in gid2out_ and phase // two lists found in gid2in_. int phase1_index = 0; int phase2_index = 0; // Count and fill in multisend_index and multisend_phase2_index_ // From the counts can allocate targets_phase1 and targets_phase2 // Then can iterate again and copy r to proper target locations. for (std::size_t i = 0; i < r.size();) { InputPreSyn* ips = nullptr; int gid = r[i++]; int size = r[i++]; if (use_phase2) { // look in gid2in first auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { // phase 2 target list ips = gid2in_it->second; ips->multisend_phase2_index_ = phase2_index; phase2_index += 1 + size; // count + ranks i += size; } } if (!ips) { // phase 1 target list (or whole list if use_phase2 is 0) auto gid2out_it = gid2out.find(gid); assert(gid2out_it != gid2out.end()); PreSyn* ps = gid2out_it->second; ps->multisend_index_ = phase1_index; phase1_index += 2 + size; // total + count + ranks if (use_phase2) { i++; } i += size; } } targets_phase1 = new int[phase1_index]; targets_phase2 = new int[phase2_index]; // printf("%d sz=%d\n", nrnmpi_myid, r.size()); for (std::size_t i = 0; i < r.size();) { InputPreSyn* ips = nullptr; int gid = r[i++]; int size = r[i++]; if (use_phase2) { // look in gid2in first auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { // phase 2 target list ips = gid2in_it->second; int p = ips->multisend_phase2_index_; int* ranks = targets_phase2 + p; ranks[0] = size; ranks += 1; // printf("%d i=%d gid=%d phase2 size=%d\n", nrnmpi_myid, i, gid, size); for (int j = 0; j < size; ++j) { ranks[j] = r[i++]; // printf("%d j=%d rank=%d\n", nrnmpi_myid, j, ranks[j]); assert(ranks[j] != nrnmpi_myid); } } } if (!ips) { // phase 1 target list (or whole list if use_phase2 is 0) auto gid2out_it = gid2out.find(gid); assert(gid2out_it != gid2out.end()); PreSyn* ps = gid2out_it->second; int p = ps->multisend_index_; int* ranks = targets_phase1 + p; int total = size; if (use_phase2) { total = r[i++]; } ranks[0] = total; ranks[1] = size; ranks += 2; // printf("%d i=%d gid=%d phase1 size=%d total=%d\n", nrnmpi_myid, i, gid, size, total); for (int j = 0; j < size; ++j) { ranks[j] = r[i++]; // printf("%d j=%d rank=%d\n", nrnmpi_myid, j, ranks[j]); // There never was a possibility of send2self // because an output presyn is never in gid2in_. assert(ranks[j] != nrnmpi_myid); } } } // compute max_ntarget_host and max_multisend_targets int max_ntarget_host = 0; int max_multisend_targets = 0; for (auto& g: gid2out) { PreSyn* ps = g.second; if (ps->output_index_ >= 0) { // only ones that generate spikes int i = ps->multisend_index_; if (i >= 0) { // only if the gid has targets on other ranks. max_ntarget_host = std::max(targets_phase1[i], max_ntarget_host); max_multisend_targets = std::max(targets_phase1[i + 1], max_multisend_targets); } } } if (use_phase2) { for (auto& g: gid2in) { InputPreSyn* ps = g.second; int i = ps->multisend_phase2_index_; if (i >= 0) { max_multisend_targets = std::max(max_multisend_targets, targets_phase2[i]); } } } } // Return the vector encoding a sequence of gid, target list size, and target list static std::vector setup_target_lists(bool use_phase2) { int nhost = nrnmpi_numprocs; // Construct hash table for finding the target rank list for a given gid. Int2TarList gid2tarlist; celldebug("output gid", gid2out); celldebug("input gid", gid2in); // What are the target ranks for a given input gid. All the ranks // with the same input gid send that gid to the intermediate // gid%nhost rank. The intermediate rank can then construct the // list of target ranks for the gids it gets. { // scnt1 is number of input gids from target std::vector scnt1(nhost, 0); for (const auto& g: gid2in) { int gid = g.first; ++scnt1[gid % nhost]; } // s1 are the input gids from target to be sent to the various intermediates const std::vector sdispl1 = newoffset(scnt1); // Make an usable copy auto sdispl1_ = sdispl1; std::vector s1(sdispl1[nhost], 0); for (const auto& g: gid2in) { int gid = g.first; s1[sdispl1_[gid % nhost]++] = gid; } std::vector r1; std::vector rdispl1; std::tie(r1, rdispl1) = all2allv_int(s1, scnt1, sdispl1, "gidin to intermediate"); // r1 is the gids received by this intermediate rank from all other ranks. // Now figure out the size of the target list for each distinct gid in r1. for (const auto& gid: r1) { if (gid2tarlist.find(gid) == gid2tarlist.end()) { gid2tarlist[gid] = new TarList{}; gid2tarlist[gid]->size = 0; } auto tar = gid2tarlist[gid]; ++(tar->size); } // Conceptually, now the intermediate is the mpi source and the gid // sources are the mpi destination in regard to target lists. // It would be possible at this point, but confusing, // to allocate a s[rdispl1[nhost]] and figure out scnt and sdispl by // by getting the counts and gids from the ranks that own the source // gids. In this way we could organize s without having to allocate // individual target lists on the intermediate and then allocate // another large s buffer to receive a copy of them. However for // this processing we already require two large buffers for input // gid's so there is no real savings of space. // So let's do the simple obvious sequence and now complete the // target lists. // Allocate the target lists (and set size to 0 (we will recount when filling). for (const auto& g: gid2tarlist) { TarList* tl = g.second; tl->alloc(); tl->size = 0; } // fill the target lists for (int rank = 0; rank < nhost; ++rank) { int b = rdispl1[rank]; int e = rdispl1[rank + 1]; for (int i = b; i < e; ++i) { const auto itl_it = gid2tarlist.find(r1[i]); if (itl_it != gid2tarlist.end()) { TarList* tl = itl_it->second; tl->list[tl->size] = rank; tl->size++; } } } } { // Now the intermediate hosts have complete target lists and // the sources know the intermediate host from the gid2out_ map. // We could potentially organize here for two-phase exchange as well. // Which target lists are desired by the source rank? // Ironically, for round robin distributions, the target lists are // already on the proper source rank so the following code should // be tested for random distributions of gids. // How many on the source rank? std::vector scnt2(nhost, 0); for (auto& g: gid2out) { int gid = g.first; PreSyn* ps = g.second; if (ps->output_index_ >= 0) { // only ones that generate spikes ++scnt2[gid % nhost]; } } const auto sdispl2 = newoffset(scnt2); auto sdispl2_ = sdispl2; // what are the gids of those target lists std::vector s2(sdispl2[nhost], 0); for (auto& g: gid2out) { int gid = g.first; PreSyn* ps = g.second; if (ps->output_index_ >= 0) { // only ones that generate spikes s2[sdispl2_[gid % nhost]++] = gid; } } std::vector r2; std::vector rdispl2; std::tie(r2, rdispl2) = all2allv_int(s2, scnt2, sdispl2, "gidout"); // fill in the tl->rank for phase 1 target lists // r2 is an array of source spiking gids // tl is list associating input gids with list of target ranks. for (int rank = 0; rank < nhost; ++rank) { int b = rdispl2[rank]; int e = rdispl2[rank + 1]; for (int i = b; i < e; ++i) { // note that there may be input gids with no corresponding // output gid so that the find may not return true and in // that case the tl->rank remains -1. // For example multisplit gids or simulation of a subset of // cells. const auto itl_it = gid2tarlist.find(r2[i]); if (itl_it != gid2tarlist.end()) { TarList* tl = itl_it->second; tl->rank = rank; } } } } if (use_phase2) { random_init(nrnmpi_myid + 1); for (const auto& gid2tar: gid2tarlist) { TarList* tl = gid2tar.second; if (tl->rank >= 0) { // only if output gid is spike generating phase2organize(tl); } } random_delete(); } // For clarity, use the all2allv_int style of information flow // from source to destination as above // and also use a uniform code // for copying one and two phase information from a TarList to // develop the s, scnt, and sdispl3 buffers. That is, a buffer list // section in s for either a one-phase list or the much shorter // (individually) lists for first and second phases, has a // gid, size, totalsize header for each list where totalsize // is only present if the gid is an output gid (for // NrnMultisend_Send.ntarget_host used for conservation). // Note that totalsize is tl->indices[tl->size] // how much to send to each rank std::vector scnt3(nhost, 0); for (const auto& gid2tar: gid2tarlist) { TarList* tl = gid2tar.second; if (tl->rank < 0) { // When the output gid does not generate spikes, that rank // is not interested if there is a target list for it. // If the output gid does not exist, there is no rank. // In either case ignore this target list. continue; } if (tl->indices) { // indices[size] is the size of list but size of those // are the sublist phase 2 destination ranks which // don't get sent as part of the phase 2 target list. // Also there is a phase 1 target list of size so there // are altogether size+1 target lists. // (one phase 1 list and size phase 2 lists) scnt3[tl->rank] += tl->size + 2; // gid, size, list for (int i = 0; i < tl->size; ++i) { scnt3[tl->list[tl->indices[i]]] += tl->indices[i + 1] - tl->indices[i] + 1; // gid, size, list } } else { // gid, list size, list scnt3[tl->rank] += tl->size + 2; } if (use_phase2) { // The phase 1 header has as its third element, the // total list size (needed for conservation); scnt3[tl->rank] += 1; } } const auto sdispl4 = newoffset(scnt3); auto sdispl4_ = sdispl4; std::vector s3(sdispl4[nhost], 0); // what to send to each rank for (const auto& gid2tar: gid2tarlist) { int gid = gid2tar.first; TarList* tl = gid2tar.second; if (tl->rank < 0) { continue; } if (tl->indices) { s3[sdispl4_[tl->rank]++] = gid; s3[sdispl4_[tl->rank]++] = tl->size; if (use_phase2) { s3[sdispl4_[tl->rank]++] = tl->indices[tl->size]; } for (int i = 0; i < tl->size; ++i) { s3[sdispl4_[tl->rank]++] = tl->list[tl->indices[i]]; } for (int i = 0; i < tl->size; ++i) { int rank = tl->list[tl->indices[i]]; s3[sdispl4_[rank]++] = gid; assert(tl->indices[i + 1] > tl->indices[i]); s3[sdispl4_[rank]++] = tl->indices[i + 1] - tl->indices[i] - 1; for (int j = tl->indices[i] + 1; j < tl->indices[i + 1]; ++j) { s3[sdispl4_[rank]++] = tl->list[j]; } } } else { // gid, list size, list s3[sdispl4_[tl->rank]++] = gid; s3[sdispl4_[tl->rank]++] = tl->size; if (use_phase2) { s3[sdispl4_[tl->rank]++] = tl->size; } for (int i = 0; i < tl->size; ++i) { s3[sdispl4_[tl->rank]++] = tl->list[i]; } } delete tl; } std::vector r_return; std::vector rdispl3; std::tie(r_return, rdispl3) = all2allv_int(s3, scnt3, sdispl4, "lists"); return r_return; } } // namespace coreneuron #endif // NRN_MULTISEND ================================================ FILE: coreneuron/network/netcon.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/mpi/nrnmpi.h" #undef check #if MAC #define NetCon nrniv_Dinfo #endif namespace coreneuron { class PreSyn; class InputPreSyn; class TQItem; struct NrnThread; struct Point_process; class NetCvode; #define DiscreteEventType 0 #define TstopEventType 1 #define NetConType 2 #define SelfEventType 3 #define PreSynType 4 #define NetParEventType 7 #define InputPreSynType 20 struct DiscreteEvent { DiscreteEvent() = default; virtual ~DiscreteEvent() = default; virtual void send(double deliverytime, NetCvode*, NrnThread*); virtual void deliver(double t, NetCvode*, NrnThread*); virtual int type() const { return DiscreteEventType; } virtual bool require_checkpoint() { return true; } virtual void pr(const char*, double t, NetCvode*); }; class NetCon: public DiscreteEvent { public: bool active_{}; double delay_{1.0}; Point_process* target_{}; union { int weight_index_{}; int srcgid_; // only to help InputPreSyn during setup // before weights are read and stored. Saves on transient // memory requirements by avoiding storage of all group file // netcon_srcgid lists. ie. that info is copied into here. } u; NetCon() = default; virtual ~NetCon() = default; virtual void send(double sendtime, NetCvode*, NrnThread*) override; virtual void deliver(double, NetCvode* ns, NrnThread*) override; virtual int type() const override { return NetConType; } virtual void pr(const char*, double t, NetCvode*) override; }; class SelfEvent: public DiscreteEvent { public: double flag_; Point_process* target_; void** movable_; // actually a TQItem** int weight_index_; SelfEvent() = default; virtual ~SelfEvent() = default; virtual void deliver(double, NetCvode*, NrnThread*) override; virtual int type() const override { return SelfEventType; } virtual void pr(const char*, double t, NetCvode*) override; private: void call_net_receive(NetCvode*); }; class ConditionEvent: public DiscreteEvent { public: // condition detection factored out of PreSyn for re-use ConditionEvent() = default; virtual ~ConditionEvent() = default; virtual bool check(NrnThread*); virtual double value(NrnThread*) { return -1.; } int flag_{}; // true when below, false when above. (changed from bool to int to avoid cray acc // bug(?)) }; class PreSyn: public ConditionEvent { public: #if NRNMPI unsigned char localgid_{}; // compressed gid for spike transfer #endif int nc_index_{}; // replaces dil_, index into global NetCon** netcon_in_presyn_order_ int nc_cnt_{}; // how many netcon starting at nc_index_ int output_index_{}; int gid_{-1}; double threshold_{10.}; int thvar_index_{-1}; // >=0 points into NrnThread._actual_v Point_process* pntsrc_{}; PreSyn() = default; virtual ~PreSyn() = default; virtual void send(double sendtime, NetCvode*, NrnThread*) override; virtual void deliver(double, NetCvode*, NrnThread*) override; virtual int type() const override { return PreSynType; } virtual double value(NrnThread*) override; void record(double t); #if NRN_MULTISEND int multisend_index_{-1}; #endif }; class InputPreSyn: public DiscreteEvent { public: int nc_index_{-1}; // replaces dil_, index into global NetCon** netcon_in_presyn_order_ int nc_cnt_{}; // how many netcon starting at nc_index_ InputPreSyn() = default; virtual ~InputPreSyn() = default; virtual void send(double sendtime, NetCvode*, NrnThread*) override; virtual void deliver(double, NetCvode*, NrnThread*) override; virtual int type() const override { return InputPreSynType; } #if NRN_MULTISEND int multisend_phase2_index_{-1}; #endif }; class NetParEvent: public DiscreteEvent { public: int ithread_; // for pr() double wx_, ws_; // exchange time and "spikes to Presyn" time NetParEvent(); virtual ~NetParEvent() = default; virtual void send(double, NetCvode*, NrnThread*) override; virtual void deliver(double, NetCvode*, NrnThread*) override; virtual int type() const override { return NetParEventType; } virtual void pr(const char*, double t, NetCvode*) override; }; } // namespace coreneuron ================================================ FILE: coreneuron/network/netcvode.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/netcon.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/network/netpar.hpp" #include "coreneuron/utils/ivocvect.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/io/output_spikes.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/network/multisend.hpp" #include "coreneuron/mechanism/membfunc.hpp" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" namespace coreneuron { #define PP2NT(pp) (nrn_threads + (pp)->_tid) #define PP2t(pp) (PP2NT(pp)->_t) //#define POINT_RECEIVE(type, tar, w, f) (*pnt_receive[type])(tar, w, f) double NetCvode::eps_; NetCvode* net_cvode_instance; bool cvode_active_; /// Flag to use the bin queue bool nrn_use_bin_queue_ = 0; void mk_netcvode() { if (!net_cvode_instance) { net_cvode_instance = new NetCvode(); } } #ifdef DEBUG // temporary static int nrn_errno_check(int type) { printf("nrn_errno_check() was called on pid %d: errno=%d type=%d\n", nrnmpi_myid, errno, type); // assert(0); type = 0; return 1; } #endif // for _OPENACC and/or NET_RECEIVE_BUFFERING // sem 0:3 send event move void net_sem_from_gpu(int sendtype, int i_vdata, int weight_index_, int ith, int ipnt, double td, double flag) { NrnThread& nt = nrn_threads[ith]; Point_process* pnt = (Point_process*) nt._vdata[ipnt]; if (sendtype == 0) { net_send(nt._vdata + i_vdata, weight_index_, pnt, td, flag); } else if (sendtype == 2) { net_move(nt._vdata + i_vdata, pnt, td); } else { net_event(pnt, td); } } void net_send(void** v, int weight_index_, Point_process* pnt, double td, double flag) { NrnThread* nt = PP2NT(pnt); NetCvodeThreadData& p = net_cvode_instance->p[nt->id]; SelfEvent* se = new SelfEvent; se->flag_ = flag; se->target_ = pnt; se->weight_index_ = weight_index_; if (v >= nt->_vdata) { se->movable_ = v; // needed for SaveState } assert(net_cvode_instance); ++p.unreffed_event_cnt_; if (td < nt->_t) { char buf[100]; sprintf(buf, "net_send td-t = %g", td - nt->_t); se->pr(buf, td, net_cvode_instance); abort(); hoc_execerror("net_send delay < 0", 0); } TQItem* q = net_cvode_instance->event(td, se, nt); if (flag == 1.0 && v >= nt->_vdata) { *v = (void*) q; } // printf("net_send %g %s %g %p\n", td, pnt_name(pnt), flag, *v); } void artcell_net_send(void** v, int weight_index_, Point_process* pnt, double td, double flag) { net_send(v, weight_index_, pnt, td, flag); } void net_event(Point_process* pnt, double time) { NrnThread* nt = PP2NT(pnt); PreSyn* ps = nt->presyns + nt->pnt2presyn_ix[corenrn.get_pnttype2presyn()[pnt->_type]][pnt->_i_instance]; if (ps) { if (time < nt->_t) { char buf[100]; sprintf(buf, "net_event time-t = %g", time - nt->_t); ps->pr(buf, time, net_cvode_instance); hoc_execerror("net_event time < t", 0); } ps->send(time, net_cvode_instance, nt); } } NetCvodeThreadData::NetCvodeThreadData() : tqe_{new TQueue()} { inter_thread_events_.reserve(1000); } NetCvodeThreadData::~NetCvodeThreadData() { delete tqe_; } /// If the PreSyn is on a different thread than the target, /// we have to lock the buffer void NetCvodeThreadData::interthread_send(double td, DiscreteEvent* db, NrnThread* /* nt */) { std::lock_guard lock(mut); inter_thread_events_.emplace_back(InterThreadEvent{db, td}); } void interthread_enqueue(NrnThread* nt) { net_cvode_instance->p[nt->id].enqueue(net_cvode_instance, nt); } void NetCvodeThreadData::enqueue(NetCvode* nc, NrnThread* nt) { std::lock_guard lock(mut); for (const auto& ite: inter_thread_events_) { nc->bin_event(ite.t_, ite.de_, nt); } inter_thread_events_.clear(); } NetCvode::NetCvode() { eps_ = 100. * DBL_EPSILON; #if PRINT_EVENT print_event_ = 1; #else print_event_ = 0; #endif pcnt_ = 0; p = nullptr; p_construct(1); // eventually these should not have to be thread safe // for parallel network simulations hardly any presyns have // a threshold and it can be very inefficient to check the entire // presyn list for thresholds during the fixed step method. // So keep a threshold list. } NetCvode::~NetCvode() { if (net_cvode_instance == this) { net_cvode_instance = nullptr; } p_construct(0); } void nrn_p_construct() { net_cvode_instance->p_construct(nrn_nthread); } void NetCvode::p_construct(int n) { if (pcnt_ != n) { if (p) { delete[] p; p = nullptr; } if (n > 0) p = new NetCvodeThreadData[n]; else p = nullptr; pcnt_ = n; } for (int i = 0; i < n; ++i) p[i].unreffed_event_cnt_ = 0; } TQItem* NetCvode::bin_event(double td, DiscreteEvent* db, NrnThread* nt) { if (nrn_use_bin_queue_) { #if PRINT_EVENT if (print_event_) { db->pr("binq send", td, this); } #endif return p[nt->id].tqe_->enqueue_bin(td, db); } else { #if PRINT_EVENT if (print_event_) { db->pr("send", td, this); } #endif return p[nt->id].tqe_->insert(td, db); } } TQItem* NetCvode::event(double td, DiscreteEvent* db, NrnThread* nt) { #if PRINT_EVENT if (print_event_) { db->pr("send", td, this); } #endif return p[nt->id].tqe_->insert(td, db); } void NetCvode::clear_events() { // DiscreteEvents may already have gone out of existence so the tqe_ // may contain many invalid item data pointers enqueueing_ = 0; for (int i = 0; i < nrn_nthread; ++i) { NetCvodeThreadData& d = p[i]; delete d.tqe_; d.tqe_ = new TQueue(); d.unreffed_event_cnt_ = 0; d.inter_thread_events_.clear(); d.tqe_->nshift_ = -1; d.tqe_->shift_bin(nrn_threads->_t - 0.5 * nrn_threads->_dt); } } void NetCvode::init_events() { for (int i = 0; i < nrn_nthread; ++i) { p[i].tqe_->nshift_ = -1; p[i].tqe_->shift_bin(nrn_threads->_t - 0.5 * nrn_threads->_dt); } for (int tid = 0; tid < nrn_nthread; ++tid) { // can be done in parallel NrnThread* nt = nrn_threads + tid; for (int ipre = 0; ipre < nt->n_presyn; ++ipre) { PreSyn* ps = nt->presyns + ipre; ps->flag_ = false; } for (int inetc = 0; inetc < nt->n_netcon; ++inetc) { NetCon* d = nt->netcons + inetc; if (d->target_) { int type = d->target_->_type; if (corenrn.get_pnt_receive_init()[type]) { (*corenrn.get_pnt_receive_init()[type])(d->target_, d->u.weight_index_, 0); } else { int cnt = corenrn.get_pnt_receive_size()[type]; double* wt = nt->weights + d->u.weight_index_; // not the first for (int j = 1; j < cnt; ++j) { wt[j] = 0.; } } } } } } bool NetCvode::deliver_event(double til, NrnThread* nt) { TQItem* q = p[nt->id].tqe_->atomic_dq(til); if (q == nullptr) { return false; } DiscreteEvent* de = q->data_; double tt = q->t_; delete q; #if PRINT_EVENT if (print_event_) { de->pr("deliver", tt, this); } #endif de->deliver(tt, this, nt); /// In case of a self event we need to delete the self event if (de->type() == SelfEventType) { delete static_cast(de); } return true; } void net_move(void** v, Point_process* pnt, double tt) { // assert, if possible that *v == pnt->movable. if (!(*v)) hoc_execerror("No event with flag=1 for net_move in ", corenrn.get_memb_func(pnt->_type).sym); TQItem* q = (TQItem*) (*v); // printf("net_move tt=%g %s *v=%p\n", tt, memb_func[pnt->_type].sym, *v); if (tt < PP2t(pnt)) nrn_assert(0); net_cvode_instance->move_event(q, tt, PP2NT(pnt)); } void artcell_net_move(void** v, Point_process* pnt, double tt) { net_move(v, pnt, tt); } void NetCvode::move_event(TQItem* q, double tnew, NrnThread* nt) { int tid = nt->id; #if PRINT_EVENT if (print_event_) { SelfEvent* se = (SelfEvent*) q->data_; printf("NetCvode::move_event self event target %s t=%g, old=%g new=%g\n", corenrn.get_memb_func(se->target_->_type).sym, nt->_t, q->t_, tnew); } #endif p[tid].tqe_->move(q, tnew); } void NetCvode::deliver_events(double til, NrnThread* nt) { // printf("deliver_events til %20.15g\n", til); /// Enqueue any outstanding events in the interthread event buffer p[nt->id].enqueue(this, nt); /// Deliver events. When the map is used, the loop is explicit while (deliver_event(til, nt)) ; } void PreSyn::record(double tt) { spikevec_lock(); if (gid_ > -1) { spikevec_gid.push_back(gid_); spikevec_time.push_back(tt); } spikevec_unlock(); } bool ConditionEvent::check(NrnThread* nt) { if (value(nt) > 0.0) { if (flag_ == false) { flag_ = true; return true; } } else { flag_ = false; } return false; } void DiscreteEvent::send(double tt, NetCvode* ns, NrnThread* nt) { ns->event(tt, this, nt); } void DiscreteEvent::deliver(double /* tt */, NetCvode* /* ns */, NrnThread* /* nt */) {} void DiscreteEvent::pr(const char* s, double tt, NetCvode* /* ns */) { printf("%s DiscreteEvent %.15g\n", s, tt); } void NetCon::send(double tt, NetCvode* ns, NrnThread* nt) { if (active_ && target_) { nrn_assert(PP2NT(target_) == nt); ns->bin_event(tt, this, PP2NT(target_)); } } void NetCon::deliver(double tt, NetCvode* /* ns */, NrnThread* nt) { nrn_assert(target_); if (PP2NT(target_) != nt) printf("NetCon::deliver nt=%d target=%d\n", nt->id, PP2NT(target_)->id); nrn_assert(PP2NT(target_) == nt); int typ = target_->_type; nt->_t = tt; // printf("NetCon::deliver t=%g tt=%g %s\n", t, tt, pnt_name(target_)); std::string ss("net-receive-"); ss += nrn_get_mechname(typ); Instrumentor::phase p_get_pnt_receive(ss.c_str()); (*corenrn.get_pnt_receive()[typ])(target_, u.weight_index_, 0); #ifdef DEBUG if (errno && nrn_errno_check(typ)) hoc_warning("errno set during NetCon deliver to NET_RECEIVE", (char*) 0); #endif } void NetCon::pr(const char* s, double tt, NetCvode* /* ns */) { Point_process* pp = target_; printf("%s NetCon target=%s[%d] %.15g\n", s, corenrn.get_memb_func(pp->_type).sym, pp->_i_instance, tt); } void PreSyn::send(double tt, NetCvode* ns, NrnThread* nt) { record(tt); for (int i = nc_cnt_ - 1; i >= 0; --i) { NetCon* d = netcon_in_presyn_order_[nc_index_ + i]; if (d->active_ && d->target_) { NrnThread* n = PP2NT(d->target_); if (nt == n) ns->bin_event(tt + d->delay_, d, n); else ns->p[n->id].interthread_send(tt + d->delay_, d, n); } } #if NRNMPI if (output_index_ >= 0) { #if NRN_MULTISEND if (use_multisend_) { nrn_multisend_send(this, tt, nt); } else { #else { #endif if (nrn_use_localgid_) { nrn_outputevent(localgid_, tt); } else { nrn2ncs_outputevent(output_index_, tt); } } } #endif // NRNMPI } void InputPreSyn::send(double tt, NetCvode* ns, NrnThread* nt) { for (int i = nc_cnt_ - 1; i >= 0; --i) { NetCon* d = netcon_in_presyn_order_[nc_index_ + i]; if (d->active_ && d->target_) { NrnThread* n = PP2NT(d->target_); if (nt == n) ns->bin_event(tt + d->delay_, d, n); else ns->p[n->id].interthread_send(tt + d->delay_, d, n); } } } void PreSyn::deliver(double, NetCvode*, NrnThread*) { assert(0); // no PreSyn delay. } void InputPreSyn::deliver(double, NetCvode*, NrnThread*) { assert(0); // no InputPreSyn delay. } void SelfEvent::deliver(double tt, NetCvode* ns, NrnThread* nt) { nrn_assert(nt == PP2NT(target_)); PP2t(target_) = tt; // printf("SelfEvent::deliver t=%g tt=%g %s\n", PP2t(target_), tt, pnt_name(target_)); call_net_receive(ns); } void SelfEvent::call_net_receive(NetCvode* ns) { (*corenrn.get_pnt_receive()[target_->_type])(target_, weight_index_, flag_); #ifdef DEBUG if (errno && nrn_errno_check(target_->_type)) hoc_warning("errno set during SelfEvent deliver to NET_RECEIVE", (char*) 0); #endif NetCvodeThreadData& nctd = ns->p[PP2NT(target_)->id]; --nctd.unreffed_event_cnt_; } void SelfEvent::pr(const char* s, double tt, NetCvode*) { printf("%s", s); printf(" SelfEvent target=%s %.15g flag=%g\n", pnt_name(target_), tt, flag_); } void ncs2nrn_integrate(double tstop) { int total_sim_steps = static_cast((tstop - nrn_threads->_t) / dt + 1e-9); if (total_sim_steps > 3 && !nrn_have_gaps) { nrn_fixed_step_group_minimal(total_sim_steps); } else { nrn_fixed_single_steps_minimal(total_sim_steps, tstop); } // handle all the pending flag=1 self events for (int i = 0; i < nrn_nthread; ++i) nrn_assert(nrn_threads[i]._t == nrn_threads->_t); } // factored this out from deliver_net_events so we can // stay in the cache // net_send_buffer added so checking can be done on gpu // while event queueing is on cpu. // Remember: passsing reference variable causes cray // compiler bug static bool pscheck(double var, double thresh, int* flag) { if (var > thresh) { if (*flag == false) { *flag = true; return true; } } else { *flag = false; } return false; } double PreSyn::value(NrnThread* nt) { return nt->_actual_v[thvar_index_] - threshold_; } void NetCvode::check_thresh(NrnThread* nt) { // for default method Instrumentor::phase p("check-threshold"); double teps = 1e-10; nt->_net_send_buffer_cnt = 0; int net_send_buf_count = 0; PreSyn* presyns = nt->presyns; PreSynHelper* presyns_helper = nt->presyns_helper; double* actual_v = nt->_actual_v; if (nt->ncell == 0) return; nrn_pragma_acc(parallel loop present( nt [0:1], presyns_helper [0:nt->n_presyn], presyns [0:nt->n_presyn], actual_v [0:nt->end]) copy(net_send_buf_count) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count) if(nt->compute_gpu)) for (int i = 0; i < nt->n_real_output; ++i) { PreSyn* ps = presyns + i; PreSynHelper* psh = presyns_helper + i; int idx = 0; int thidx = ps->thvar_index_; double v = actual_v[thidx]; double threshold = ps->threshold_; int* flag = &(psh->flag_); if (pscheck(v, threshold, flag)) { #ifndef CORENEURON_ENABLE_GPU nt->_net_send_buffer_cnt = net_send_buf_count; if (nt->_net_send_buffer_cnt >= nt->_net_send_buffer_size) { nt->_net_send_buffer_size *= 2; nt->_net_send_buffer = (int*) erealloc(nt->_net_send_buffer, nt->_net_send_buffer_size * sizeof(int)); } #endif nrn_pragma_acc(atomic capture) nrn_pragma_omp(atomic capture) idx = net_send_buf_count++; nt->_net_send_buffer[idx] = i; } } nrn_pragma_acc(wait(nt->stream_id)) nt->_net_send_buffer_cnt = net_send_buf_count; if (nt->compute_gpu && nt->_net_send_buffer_cnt) { #ifdef CORENEURON_ENABLE_GPU int* nsbuffer = nt->_net_send_buffer; #endif nrn_pragma_acc(update host(nsbuffer [0:nt->_net_send_buffer_cnt]) async(nt->stream_id)) nrn_pragma_acc(wait(nt->stream_id)) nrn_pragma_omp(target update from(nsbuffer [0:nt->_net_send_buffer_cnt])) } // on CPU... for (int i = 0; i < nt->_net_send_buffer_cnt; ++i) { PreSyn* ps = nt->presyns + nt->_net_send_buffer[i]; ps->send(nt->_t + teps, net_cvode_instance, nt); } // Types that have WATCH statements. If exist, then last element is 0. if (nt->_watch_types) { for (int i = 0; nt->_watch_types[i] != 0; ++i) { int type = nt->_watch_types[i]; (*corenrn.get_watch_check()[type])(nt, nt->_ml_list[type]); // may generate net_send events (with 0 (teps) delay) } } } // WATCH statements are rare. Conceptually they are very similar to // PreSyn thresholds as above but an optimal peformance implementation for GPU is // not obvious. Each WATCH statement threshold test could make use of // pscheck. Note that it is possible that there are several active WATCH // statements for a given POINT_PROCESS instance as well as none active. // Also WATCH statements switch between active and inactive state. // // In NEURON, // both PreSyn and WatchCondition were subclasses of ConditionEvent. When // a WatchCondition fired in the fixed step method, it was placed on the queue // with a delivery time of t+teps. WatchCondition::deliver called the NET_RECEIVE // block with proper flag ( but nullptr weight vector). WatchConditions // were created,added/removed,destroyed from a list as necessary. // Perhaps the most commonly used WATCH statement is in the context of a // ThresholdDetect Point_process which watches voltage and compares to // an instance specific threshold parameter. A firing ThresholdDetect instance // would call net_event(tdeliver) which then feeds into the standard // artcell PreSyn sequence (using pntsrc_ instead of thvar_index_). // // So... the PreSyns have the same order as they are checked (although PreSyn // data is AoS instead of SoA and nested 'if' means a failure of SIMD.) // But if multiple WATCH, there is (from one kind of implementation viewpoint), // yet another 'if' with regard to whether a WATCH is active. And if there // are multiple WATCH, the size of the list is dynamic. // // An experimental implementation is to check all WATCH of all instances // of a type with the proviso that there is an active flag for each WATCH. // ie. active, below, var1, var2 are all SoA (except one of the var may // be voltage). Can use 'if (active && pscheck(var1, var2, &below)' // The mod file net_send_buffering fragments can be used which // ultimately call net_send using a transient SelfEvent. ie. all // checking computation takes place in the context of the mod file without // using explicit WatchCondition instances. // events including binqueue events up to t+dt/2 void NetCvode::deliver_net_events(NrnThread* nt) { // for default method #if NRN_MULTISEND if (use_multisend_ && nt->id == 0) { nrn_multisend_advance(); } #endif int tid = nt->id; double tsav = nt->_t; double tm = nt->_t + 0.5 * nt->_dt; tryagain: // one of the events on the main queue may be a NetParEvent // which due to dt round off error can result in an event // placed on the bin queue to be delivered now, which // can put 0 delay events on to the main queue. So loop til // no events. The alternative would be to deliver an idt=0 event // immediately but that would very much change the sequence // with respect to what is being done here and it is unclear // how to fix the value of t there. This can be a do while loop // but I do not want to affect the case of not using a bin queue. if (nrn_use_bin_queue_) { TQItem* q; while ((q = p[tid].tqe_->dequeue_bin()) != 0) { DiscreteEvent* db = q->data_; #if PRINT_EVENT if (print_event_) { db->pr("binq deliver", nrn_threads->_t, this); } #endif delete q; db->deliver(nt->_t, this, nt); } // assert(int(tm/nt->_dt)%1000 == p[tid].tqe_->nshift_); } deliver_events(tm, nt); if (nrn_use_bin_queue_) { if (p[tid].tqe_->top()) { goto tryagain; } p[tid].tqe_->shift_bin(tm); } nt->_t = tsav; /*before executing on gpu, we have to update the NetReceiveBuffer_t on GPU */ update_net_receive_buffer(nt); for (auto& net_buf_receive: corenrn.get_net_buf_receive()) { std::string ss("net-buf-receive-"); ss += nrn_get_mechname(net_buf_receive.second); Instrumentor::phase p_net_buf_receive(ss.c_str()); (*net_buf_receive.first)(nt); } } } // namespace coreneuron ================================================ FILE: coreneuron/network/netcvode.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/utils/nrnmutdec.hpp" #include "coreneuron/network/tqueue.hpp" #define PRINT_EVENT 0 /** QTYPE options include: spltree, pq_que * STL priority queue is used instead of the splay tree by default. * @todo: check if stl queue works with move_event functions. */ #ifdef ENABLE_SPLAYTREE_QUEUING #define QTYPE spltree #else #define QTYPE pq_que #endif namespace coreneuron { // defined in coreneuron/network/cvodestb.cpp extern void init_net_events(void); extern void nrn_play_init(void); extern void deliver_net_events(NrnThread*); extern void nrn_deliver_events(NrnThread*); extern void fixed_play_continuous(NrnThread*); struct DiscreteEvent; class NetCvode; extern NetCvode* net_cvode_instance; extern void interthread_enqueue(NrnThread*); struct InterThreadEvent { DiscreteEvent* de_; double t_; }; class NetCvodeThreadData { public: int unreffed_event_cnt_ = 0; TQueue* tqe_; std::vector inter_thread_events_; OMP_Mutex mut; NetCvodeThreadData(); virtual ~NetCvodeThreadData(); void interthread_send(double, DiscreteEvent*, NrnThread*); void enqueue(NetCvode*, NrnThread*); }; class NetCvode { public: int print_event_; int pcnt_; int enqueueing_; NetCvodeThreadData* p; static double eps_; NetCvode(void); virtual ~NetCvode(); void p_construct(int); void check_thresh(NrnThread*); static double eps(double x) { return eps_ * fabs(x); } TQItem* event(double tdeliver, DiscreteEvent*, NrnThread*); void move_event(TQItem*, double, NrnThread*); TQItem* bin_event(double tdeliver, DiscreteEvent*, NrnThread*); void deliver_net_events(NrnThread*); // for default staggered time step method void deliver_events(double til, NrnThread*); // for initialization events bool deliver_event(double til, NrnThread*); // uses TQueue atomically void clear_events(); void init_events(); void point_receive(int, Point_process*, double*, double); }; } // namespace coreneuron ================================================ FILE: coreneuron/network/netpar.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpidec.h" #include "coreneuron/network/netcon.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/utils/ivocvect.hpp" #include "coreneuron/network/multisend.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/utils/utils.hpp" #if NRNMPI #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" int localgid_size_; int ag_send_nspike; namespace coreneuron { int* nrnmpi_nin_; } int ovfl_capacity; int icapacity; unsigned char* spikeout_fixed; unsigned char* spfixin_ovfl_; unsigned char* spikein_fixed; int ag_send_size; int ovfl; int nout; coreneuron::NRNMPI_Spikebuf* spbufout; coreneuron::NRNMPI_Spikebuf* spbufin; #endif namespace coreneuron { class PreSyn; class InputPreSyn; void nrn_spike_exchange_init(); #if NRNMPI static double t_exchange_; static double dt1_; // 1/dt NRNMPI_Spike* spikeout; NRNMPI_Spike* spikein; void nrn_timeout(int); void nrn_spike_exchange(NrnThread*); void nrn2ncs_outputevent(int netcon_output_index, double firetime); // for compressed gid info during spike exchange bool nrn_use_localgid_; void nrn_outputevent(unsigned char localgid, double firetime); std::vector> localmaps; static int ocapacity_; // for spikeout // require it to be smaller than min_interprocessor_delay. static double wt_; // wait time for nrnmpi_spike_exchange static double wt1_; // time to find the PreSyns and send the spikes. static bool use_compress_; static int spfixout_capacity_; static int idxout_; static void nrn_spike_exchange_compressed(NrnThread*); #endif // NRNMPI static bool active_ = false; static double usable_mindelay_; static double mindelay_; // the one actually used. Some of our optional algorithms static double last_maxstep_arg_; static std::vector npe_; // nrn_nthread of them #if NRNMPI // for combination of threads and mpi. static OMP_Mutex mut; #endif /// Allocate space for spikes: 200 structs of {int gid; double time} /// coming from nrnmpi.h and array of int of the global domain size static void alloc_mpi_space() { #if NRNMPI if (corenrn_param.mpi_enable && !spikeout) { ocapacity_ = 100; spikeout = (NRNMPI_Spike*) emalloc(ocapacity_ * sizeof(NRNMPI_Spike)); icapacity = 100; spikein = (NRNMPI_Spike*) malloc(icapacity * sizeof(NRNMPI_Spike)); nrnmpi_nin_ = (int*) emalloc(nrnmpi_numprocs * sizeof(int)); #if nrn_spikebuf_size > 0 spbufout = (NRNMPI_Spikebuf*) emalloc(sizeof(NRNMPI_Spikebuf)); spbufin = (NRNMPI_Spikebuf*) emalloc(nrnmpi_numprocs * sizeof(NRNMPI_Spikebuf)); #endif } #endif } NetParEvent::NetParEvent() : ithread_(-1) , wx_(0.) , ws_(0.) {} void NetParEvent::send(double tt, NetCvode* nc, NrnThread* nt) { nc->event(tt + usable_mindelay_, this, nt); } void NetParEvent::deliver(double tt, NetCvode* nc, NrnThread* nt) { net_cvode_instance->deliver_events(tt, nt); nt->_stop_stepping = 1; nt->_t = tt; send(tt, nc, nt); } void NetParEvent::pr(const char* m, double tt, NetCvode*) { printf("%s NetParEvent %d t=%.15g tt-t=%g\n", m, ithread_, tt, tt - nrn_threads[ithread_]._t); } #if NRNMPI inline static void sppk(unsigned char* c, int gid) { for (int i = localgid_size_ - 1; i >= 0; --i) { c[i] = gid & 255; gid >>= 8; } } inline static int spupk(unsigned char* c) { int gid = *c++; for (int i = 1; i < localgid_size_; ++i) { gid <<= 8; gid += *c++; } return gid; } void nrn_outputevent(unsigned char localgid, double firetime) { if (!active_) { return; } std::lock_guard lock(mut); nout++; int i = idxout_; idxout_ += 2; if (idxout_ >= spfixout_capacity_) { spfixout_capacity_ *= 2; spikeout_fixed = (unsigned char*) erealloc(spikeout_fixed, spfixout_capacity_ * sizeof(unsigned char)); } spikeout_fixed[i++] = (unsigned char) ((firetime - t_exchange_) * dt1_ + .5); spikeout_fixed[i] = localgid; // printf("%d idx=%d lgid=%d firetime=%g t_exchange_=%g [0]=%d [1]=%d\n", nrnmpi_myid, i, // (int)localgid, firetime, t_exchange_, (int)spikeout_fixed[i-1], (int)spikeout_fixed[i]); } void nrn2ncs_outputevent(int gid, double firetime) { if (!active_) { return; } std::lock_guard lock(mut); if (use_compress_) { nout++; int i = idxout_; idxout_ += 1 + localgid_size_; if (idxout_ >= spfixout_capacity_) { spfixout_capacity_ *= 2; spikeout_fixed = (unsigned char*) erealloc(spikeout_fixed, spfixout_capacity_ * sizeof(unsigned char)); } // printf("%d nrnncs_outputevent %d %.20g %.20g %d\n", nrnmpi_myid, gid, firetime, // t_exchange_, //(int)((unsigned char)((firetime - t_exchange_)*dt1_ + .5))); spikeout_fixed[i++] = (unsigned char) ((firetime - t_exchange_) * dt1_ + .5); // printf("%d idx=%d firetime=%g t_exchange_=%g spfixout=%d\n", nrnmpi_myid, i, firetime, // t_exchange_, (int)spikeout_fixed[i-1]); sppk(spikeout_fixed + i, gid); // printf("%d idx=%d gid=%d spupk=%d\n", nrnmpi_myid, i, gid, spupk(spikeout_fixed+i)); } else { #if nrn_spikebuf_size == 0 int i = nout++; if (i >= ocapacity_) { ocapacity_ *= 2; spikeout = (NRNMPI_Spike*) erealloc(spikeout, ocapacity_ * sizeof(NRNMPI_Spike)); } // printf("%d cell %d in slot %d fired at %g\n", nrnmpi_myid, gid, i, firetime); spikeout[i].gid = gid; spikeout[i].spiketime = firetime; #else int i = nout++; if (i >= nrn_spikebuf_size) { i -= nrn_spikebuf_size; if (i >= ocapacity_) { ocapacity_ *= 2; spikeout = (NRNMPI_Spike*) hoc_Erealloc(spikeout, ocapacity_ * sizeof(NRNMPI_Spike)); hoc_malchk(); } spikeout[i].gid = gid; spikeout[i].spiketime = firetime; } else { spbufout->gid[i] = gid; spbufout->spiketime[i] = firetime; } #endif } // printf("%d cell %d in slot %d fired at %g\n", nrnmpi_myid, gid, i, firetime); } #endif // NRNMPI static bool nrn_need_npe() { if (active_ || nrn_nthread > 1) { if (last_maxstep_arg_ == 0) { last_maxstep_arg_ = 100.; } return true; } else { if (!npe_.empty()) { npe_.clear(); npe_.shrink_to_fit(); } return false; } } #define TBUFSIZE 0 void nrn_spike_exchange_init() { // printf("nrn_spike_exchange_init\n"); if (!nrn_need_npe()) { return; } alloc_mpi_space(); usable_mindelay_ = mindelay_; #if NRN_MULTISEND if (use_multisend_ && n_multisend_interval == 2) { usable_mindelay_ *= 0.5; } #endif if (nrn_nthread > 1) { usable_mindelay_ -= dt; } if ((usable_mindelay_ < 1e-9) || (usable_mindelay_ < dt)) { if (nrnmpi_myid == 0) { hoc_execerror("usable mindelay is 0", "(or less than dt for fixed step method)"); } else { return; } } #if TBUFSIZE itbuf_ = 0; #endif #if NRN_MULTISEND if (use_multisend_) { nrn_multisend_init(); } #endif if (npe_.size() != static_cast(nrn_nthread)) { if (!npe_.empty()) { npe_.clear(); npe_.shrink_to_fit(); } npe_.resize(nrn_nthread); } for (int i = 0; i < nrn_nthread; ++i) { npe_[i].ithread_ = i; npe_[i].wx_ = 0.; npe_[i].ws_ = 0.; npe_[i].send(t, net_cvode_instance, nrn_threads + i); } #if NRNMPI if (corenrn_param.mpi_enable) { if (use_compress_) { idxout_ = 2; t_exchange_ = t; dt1_ = rev_dt; usable_mindelay_ = floor(mindelay_ * dt1_ + 1e-9) * dt; if (usable_mindelay_ * dt1_ >= 255.) { usable_mindelay_ = 255. / dt1_; } assert(usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) <= 255.); } else { #if nrn_spikebuf_size > 0 if (spbufout) { spbufout->nspike = 0; } #endif } nout = 0; } #endif // NRNMPI // if (nrnmpi_myid == 0){printf("usable_mindelay_ = %g\n", usable_mindelay_);} } #if NRNMPI void nrn_spike_exchange(NrnThread* nt) { Instrumentor::phase p_spike_exchange("spike-exchange"); if (!active_) { return; } #if NRN_MULTISEND if (use_multisend_) { nrn_multisend_receive(nt); return; } #endif if (use_compress_) { nrn_spike_exchange_compressed(nt); return; } #if TBUFSIZE nrnmpi_barrier(); #endif #if nrn_spikebuf_size > 0 spbufout->nspike = nout; #endif double wt = nrn_wtime(); int n = nrnmpi_spike_exchange( nrnmpi_nin_, spikeout, icapacity, &spikein, ovfl, nout, spbufout, spbufin); wt_ = nrn_wtime() - wt; wt = nrn_wtime(); #if TBUFSIZE tbuf_[itbuf_++] = (unsigned long) nout; tbuf_[itbuf_++] = (unsigned long) n; #endif errno = 0; // if (n > 0) { // printf("%d nrn_spike_exchange sent %d received %d\n", nrnmpi_myid, nout, n); //} nout = 0; if (n == 0) { return; } #if nrn_spikebuf_size > 0 for (int i = 0; i < nrnmpi_numprocs; ++i) { int nn = spbufin[i].nspike; if (nn > nrn_spikebuf_size) { nn = nrn_spikebuf_size; } for (int j = 0; j < nn; ++j) { auto gid2in_it = gid2in.find(spbufin[i].gid[j]); if (gid2in_it != gid2in.end()) { InputPreSyn* ps = gid2in_it->second; ps->send(spbufin[i].spiketime[j], net_cvode_instance, nt); } } } n = ovfl; #endif // nrn_spikebuf_size > 0 for (int i = 0; i < n; ++i) { auto gid2in_it = gid2in.find(spikein[i].gid); if (gid2in_it != gid2in.end()) { InputPreSyn* ps = gid2in_it->second; ps->send(spikein[i].spiketime, net_cvode_instance, nt); } } nrn_multithread_job(interthread_enqueue); wt1_ = nrn_wtime() - wt; } void nrn_spike_exchange_compressed(NrnThread* nt) { if (!active_) { return; } #if TBUFSIZE nrnmpi_barrier(); #endif assert(nout < 0x10000); spikeout_fixed[1] = (unsigned char) (nout & 0xff); spikeout_fixed[0] = (unsigned char) (nout >> 8); double wt = nrn_wtime(); int n = nrnmpi_spike_exchange_compressed(localgid_size_, spfixin_ovfl_, ag_send_nspike, nrnmpi_nin_, ovfl_capacity, spikeout_fixed, ag_send_size, spikein_fixed, ovfl); wt_ = nrn_wtime() - wt; wt = nrn_wtime(); #if TBUFSIZE tbuf_[itbuf_++] = (unsigned long) nout; tbuf_[itbuf_++] = (unsigned long) n; #endif errno = 0; // if (n > 0) { // printf("%d nrn_spike_exchange sent %d received %d\n", nrnmpi_myid, nout, n); //} nout = 0; idxout_ = 2; if (n == 0) { t_exchange_ = nrn_threads->_t; return; } if (nrn_use_localgid_) { int idxov = 0; for (int i = 0; i < nrnmpi_numprocs; ++i) { int j, nnn; int nn = nrnmpi_nin_[i]; if (nn) { if (i == nrnmpi_myid) { // skip but may need to increment idxov. if (nn > ag_send_nspike) { idxov += (nn - ag_send_nspike) * (1 + localgid_size_); } continue; } std::map gps = localmaps[i]; if (nn > ag_send_nspike) { nnn = ag_send_nspike; } else { nnn = nn; } int idx = 2 + i * ag_send_size; for (j = 0; j < nnn; ++j) { // order is (firetime,gid) pairs. double firetime = spikein_fixed[idx++] * dt + t_exchange_; int lgid = (int) spikein_fixed[idx]; idx += localgid_size_; auto gid2in_it = gps.find(lgid); if (gid2in_it != gps.end()) { InputPreSyn* ps = gid2in_it->second; ps->send(firetime + 1e-10, net_cvode_instance, nt); } } for (; j < nn; ++j) { double firetime = spfixin_ovfl_[idxov++] * dt + t_exchange_; int lgid = (int) spfixin_ovfl_[idxov]; idxov += localgid_size_; auto gid2in_it = gps.find(lgid); if (gid2in_it != gps.end()) { InputPreSyn* ps = gid2in_it->second; ps->send(firetime + 1e-10, net_cvode_instance, nt); } } } } } else { for (int i = 0; i < nrnmpi_numprocs; ++i) { int nn = nrnmpi_nin_[i]; if (nn > ag_send_nspike) { nn = ag_send_nspike; } int idx = 2 + i * ag_send_size; for (int j = 0; j < nn; ++j) { // order is (firetime,gid) pairs. double firetime = spikein_fixed[idx++] * dt + t_exchange_; int gid = spupk(spikein_fixed + idx); idx += localgid_size_; auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { InputPreSyn* ps = gid2in_it->second; ps->send(firetime + 1e-10, net_cvode_instance, nt); } } } n = ovfl; int idx = 0; for (int i = 0; i < n; ++i) { double firetime = spfixin_ovfl_[idx++] * dt + t_exchange_; int gid = spupk(spfixin_ovfl_ + idx); idx += localgid_size_; auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { InputPreSyn* ps = gid2in_it->second; ps->send(firetime + 1e-10, net_cvode_instance, nt); } } } // In case of multiple threads some above ps->send events put // NetCon events into interthread buffers. Some of those may // need to be delivered early enough that the interthread buffers // need transfer to the thread event queues before the next dqueue_bin // while loop in deliver_net_events. So enqueue now... nrn_multithread_job(interthread_enqueue); t_exchange_ = nrn_threads->_t; wt1_ = nrn_wtime() - wt; } static void mk_localgid_rep() { // how many gids are there on this machine // and can they be compressed into one byte int ngid = 0; for (const auto& gid2out_elem: gid2out) { if (gid2out_elem.second->output_index_ >= 0) { ++ngid; } } int ngidmax = nrnmpi_int_allmax(ngid); if (ngidmax > 256) { // do not compress return; } localgid_size_ = sizeof(unsigned char); nrn_use_localgid_ = true; // allocate Allgather receive buffer (send is the nrnmpi_myid one) int* rbuf = new int[nrnmpi_numprocs * (ngidmax + 1)]; int* sbuf = new int[ngidmax + 1]; sbuf[0] = ngid; ++sbuf; ngid = 0; // define the local gid and fill with the gids on this machine for (const auto& gid2out_elem: gid2out) { if (gid2out_elem.second->output_index_ >= 0) { gid2out_elem.second->localgid_ = (unsigned char) ngid; sbuf[ngid] = gid2out_elem.second->output_index_; ++ngid; } } --sbuf; // exchange everything nrnmpi_int_allgather(sbuf, rbuf, ngidmax + 1); delete[] sbuf; errno = 0; // create the maps // there is a lot of potential for efficiency here. i.e. use of // perfect hash functions, or even simple Vectors. localmaps.clear(); localmaps.resize(nrnmpi_numprocs); // fill in the maps for (int i = 0; i < nrnmpi_numprocs; ++i) if (i != nrnmpi_myid) { sbuf = rbuf + i * (ngidmax + 1); ngid = *(sbuf++); for (int k = 0; k < ngid; ++k) { auto gid2in_it = gid2in.find(int(sbuf[k])); if (gid2in_it != gid2in.end()) { localmaps[i][k] = gid2in_it->second; } } } // cleanup delete[] rbuf; } #endif // NRNMPI // may stimulate a gid for a cell not owned by this cpu. This allows // us to run single cells or subnets and stimulate exactly according to // their input in a full parallel net simulation. // For some purposes, it may be useful to simulate a spike from a // cell that does exist and would normally send its own spike, eg. // recurrent stimulation. This can be useful in debugging where the // spike raster comes from another implementation and one wants to // get complete control of all input spikes without the confounding // effects of output spikes from the simulated cells. In this case // set the third arg to 1 and set the output cell thresholds very // high so that they do not themselves generate spikes. // Can only be called by thread 0 because of the ps->send. void nrn_fake_fire(int gid, double spiketime, int fake_out) { auto gid2in_it = gid2in.find(gid); if (gid2in_it != gid2in.end()) { InputPreSyn* psi = gid2in_it->second; assert(psi); // printf("nrn_fake_fire %d %g\n", gid, spiketime); psi->send(spiketime, net_cvode_instance, nrn_threads); } else if (fake_out) { std::map::iterator gid2out_it; gid2out_it = gid2out.find(gid); if (gid2out_it != gid2out.end()) { PreSyn* ps = gid2out_it->second; assert(ps); // printf("nrn_fake_fire fake_out %d %g\n", gid, spiketime); ps->send(spiketime, net_cvode_instance, nrn_threads); } } } static int timeout_ = 0; int nrn_set_timeout(int timeout) { int tt = timeout_; timeout_ = timeout; return tt; } void BBS_netpar_solve(double tstop) { double time = nrn_wtime(); #if NRNMPI if (corenrn_param.mpi_enable) { tstopunset; double mt = dt; double md = mindelay_ - 1e-10; if (md < mt) { if (nrnmpi_myid == 0) { hoc_execerror("mindelay is 0", "(or less than dt for fixed step method)"); } else { return; } } nrn_timeout(timeout_); nrn_multithread_job(interthread_enqueue); ncs2nrn_integrate(tstop * (1. + 1e-11)); nrn_spike_exchange(nrn_threads); nrn_timeout(0); if (!npe_.empty()) { npe_[0].wx_ = npe_[0].ws_ = 0.; }; // printf("%d netpar_solve exit t=%g tstop=%g mindelay_=%g\n",nrnmpi_myid, t, tstop, // mindelay_); nrnmpi_barrier(); } else #endif { ncs2nrn_integrate(tstop); } tstopunset; if (nrnmpi_myid == 0 && !corenrn_param.is_quiet()) { printf("\nSolver Time : %g\n", nrn_wtime() - time); } } double set_mindelay(double maxdelay) { double mindelay = maxdelay; last_maxstep_arg_ = maxdelay; // if all==1 then minimum delay of all NetCon no matter the source. // except if src in same thread as NetCon int all = (nrn_nthread > 1); // minumum delay of all NetCon having an InputPreSyn source /** we have removed nt_ from PreSyn. Build local map of PreSyn * and NrnThread which will be used to find out if src in same thread as NetCon */ std::map presynmap; for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; for (int i = 0; i < nt.n_presyn; ++i) { presynmap[nt.presyns + i] = nrn_threads + ith; } } for (int ith = 0; ith < nrn_nthread; ++ith) { NrnThread& nt = nrn_threads[ith]; // if single thread or file transfer then definitely empty. std::vector& negsrcgid_tid = nrnthreads_netcon_negsrcgid_tid[ith]; size_t i_tid = 0; for (int i = 0; i < nt.n_netcon; ++i) { NetCon* nc = nt.netcons + i; bool chk = false; // ignore nc.delay_ int gid = nrnthreads_netcon_srcgid[ith][i]; int tid = ith; if (!negsrcgid_tid.empty() && gid < -1) { tid = negsrcgid_tid[i_tid++]; } PreSyn* ps; InputPreSyn* psi; netpar_tid_gid2ps(tid, gid, &ps, &psi); if (psi) { chk = true; } else if (all) { chk = true; // but ignore if src in same thread as NetCon if (ps && presynmap[ps] == &nt) { chk = false; } } if (chk && nc->delay_ < mindelay) { mindelay = nc->delay_; } } } #if NRNMPI if (corenrn_param.mpi_enable) { active_ = true; if (use_compress_) { if (mindelay / dt > 255) { mindelay = 255 * dt; } } // printf("%d netpar_mindelay local %g now calling nrnmpi_mindelay\n", nrnmpi_myid, // mindelay); // double st = time(); mindelay_ = nrnmpi_dbl_allmin(mindelay); // add_wait_time(st); // printf("%d local min=%g global min=%g\n", nrnmpi_myid, mindelay, mindelay_); errno = 0; } else #endif // NRNMPI { mindelay_ = mindelay; } return mindelay_; } /* 08-Nov-2010 The workhorse for spike exchange on up to 10K machines is MPI_Allgather but as the number of machines becomes far greater than the fanout per cell we have been exploring a class of exchange methods called multisend where the spikes only go to those machines that need them and there is overlap between communication and computation. The numer of variants of multisend has grown so that some method selection function is needed that makes sense. The situation that needs to be captured by xchng_meth is Allgather multisend implemented as MPI_ISend multisend DCMF (only for Blue Gene/P) multisend record_replay (only for Blue Gene/P with recordreplay_v1r4m2.patch) Note that Allgather allows spike compression and an allgather spike buffer with size chosen at setup time. All methods allow bin queueing. All the multisend methods should allow two phase multisend. Note that, in principle, MPI_ISend allows the source to send the index of the target PreSyn to avoid a hash table lookup (even with a two phase variant) RecordReplay should be best on the BG/P. The whole point is to make the spike transfer initiation as lowcost as possible since that is what causes most load imbalance. I.e. since 10K more spikes arrive than are sent, spikes received per processor per interval are much more statistically balanced than spikes sent per processor per interval. And presently DCMF multisend injects 10000 messages per spike into the network which is quite expensive. record replay avoids this overhead and the idea of two phase multisend distributes the injection. */ int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth) { #if NRNMPI if (corenrn_param.mpi_enable) { #if NRN_MULTISEND if (xchng_meth > 0) { use_multisend_ = 1; return 0; } #endif nrn_assert(xchng_meth == 0); if (nspike >= 0) { ag_send_nspike = 0; if (spikeout_fixed) { free(spikeout_fixed); spikeout_fixed = nullptr; } if (spikein_fixed) { free(spikein_fixed); spikein_fixed = nullptr; } if (spfixin_ovfl_) { free(spfixin_ovfl_); spfixin_ovfl_ = nullptr; } localmaps.clear(); } if (nspike == 0) { // turn off use_compress_ = false; nrn_use_localgid_ = false; } else if (nspike > 0) { // turn on use_compress_ = true; ag_send_nspike = nspike; nrn_use_localgid_ = false; if (gid_compress) { // we can only do this after everything is set up mk_localgid_rep(); if (!nrn_use_localgid_ && nrnmpi_myid == 0) { printf( "Notice: gid compression did not succeed. Probably more than 255 cells on " "one " "cpu.\n"); } } if (!nrn_use_localgid_) { localgid_size_ = sizeof(unsigned int); } ag_send_size = 2 + ag_send_nspike * (1 + localgid_size_); spfixout_capacity_ = ag_send_size + 50 * (1 + localgid_size_); spikeout_fixed = (unsigned char*) emalloc(spfixout_capacity_); spikein_fixed = (unsigned char*) emalloc(nrnmpi_numprocs * ag_send_size); ovfl_capacity = 100; spfixin_ovfl_ = (unsigned char*) emalloc(ovfl_capacity * (1 + localgid_size_)); } return ag_send_nspike; } else #endif { return 0; } } } // namespace coreneuron ================================================ FILE: coreneuron/network/netpar.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/network/partrans.hpp" #include "coreneuron/sim/multicore.hpp" namespace coreneuron { extern void nrn_spike_exchange_init(void); extern void nrn_spike_exchange(NrnThread* nt); } // namespace coreneuron ================================================ FILE: coreneuron/network/partrans.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/network/partrans.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" // This is the computational code for src->target transfer (e.g. gap junction) // simulation. // The setup code is in partrans_setup.cpp namespace coreneuron { bool nrn_have_gaps; using namespace nrn_partrans; TransferThreadData* nrn_partrans::transfer_thread_data_; // MPI_Alltoallv buffer info double* nrn_partrans::insrc_buf_; // Receive buffer for gap voltages double* nrn_partrans::outsrc_buf_; // Send buffer for gap voltages int* nrn_partrans::insrccnt_; int* nrn_partrans::insrcdspl_; int* nrn_partrans::outsrccnt_; int* nrn_partrans::outsrcdspl_; void nrnmpi_v_transfer() { // copy source values to outsrc_buf_ and mpi transfer to insrc_buf // note that same source value (usually voltage) may get copied to // several locations in outsrc_buf // gather the source values. can be done in parallel for (int tid = 0; tid < nrn_nthread; ++tid) { auto& ttd = transfer_thread_data_[tid]; auto* nt = &nrn_threads[tid]; int n = int(ttd.outsrc_indices.size()); if (n == 0) { continue; } double* src_data = nt->_data; int* src_indices = ttd.src_indices.data(); // gather sources on gpu and copy to cpu, cpu scatters to outsrc_buf double* src_gather = ttd.src_gather.data(); size_t n_src_gather = ttd.src_gather.size(); nrn_pragma_acc(parallel loop present(src_indices [0:n_src_gather], src_data [0:nt->_ndata], src_gather [0:n_src_gather]) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (std::size_t i = 0; i < n_src_gather; ++i) { src_gather[i] = src_data[src_indices[i]]; } nrn_pragma_acc(update host(src_gather [0:n_src_gather]) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target update from(src_gather [0:n_src_gather]) if (nt->compute_gpu)) } // copy gathered source values to outsrc_buf_ bool compute_gpu = false; for (int tid = 0; tid < nrn_nthread; ++tid) { if (nrn_threads[tid].compute_gpu) { compute_gpu = true; nrn_pragma_acc(wait(nrn_threads[tid].stream_id)) } TransferThreadData& ttd = transfer_thread_data_[tid]; size_t n_outsrc_indices = ttd.outsrc_indices.size(); int* outsrc_indices = ttd.outsrc_indices.data(); double* src_gather = ttd.src_gather.data(); int* src_gather_indices = ttd.gather2outsrc_indices.data(); for (size_t i = 0; i < n_outsrc_indices; ++i) { outsrc_buf_[outsrc_indices[i]] = src_gather[src_gather_indices[i]]; } } static_cast(compute_gpu); // transfer int n_insrc_buf = insrcdspl_[nrnmpi_numprocs]; #if NRNMPI if (corenrn_param.mpi_enable) { // otherwise insrc_buf_ == outsrc_buf_ nrnmpi_barrier(); nrnmpi_dbl_alltoallv( outsrc_buf_, outsrccnt_, outsrcdspl_, insrc_buf_, insrccnt_, insrcdspl_); } else #endif { // Use the multiprocess code even for one process to aid debugging // For nrnmpi_numprocs == 1, insrc_buf_ and outsrc_buf_ are same size. for (int i = 0; i < n_insrc_buf; ++i) { insrc_buf_[i] = outsrc_buf_[i]; } } // insrc_buf_ will get copied to targets via nrnthread_v_transfer nrn_pragma_acc(update device(insrc_buf_ [0:n_insrc_buf]) if (compute_gpu)) nrn_pragma_omp(target update to(insrc_buf_ [0:n_insrc_buf]) if (compute_gpu)) } void nrnthread_v_transfer(NrnThread* _nt) { // Copy insrc_buf_ values to the target locations. (An insrc_buf_ value // may be copied to several target locations. TransferThreadData& ttd = transfer_thread_data_[_nt->id]; size_t ntar = ttd.tar_indices.size(); int* tar_indices = ttd.tar_indices.data(); int* insrc_indices = ttd.insrc_indices.data(); double* tar_data = _nt->_data; // last element in the displacement vector gives total length #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) int n_insrc_buf = insrcdspl_[nrnmpi_numprocs]; int ndata = _nt->_ndata; #endif nrn_pragma_acc(parallel loop copyin(tar_indices [0:ntar]) present(insrc_indices [0:ntar], tar_data [0:ndata], insrc_buf_ [0:n_insrc_buf]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd map(to: tar_indices[0:ntar]) if(_nt->compute_gpu)) for (size_t i = 0; i < ntar; ++i) { tar_data[tar_indices[i]] = insrc_buf_[insrc_indices[i]]; } } void nrn_partrans::copy_gap_indices_to_device() { // Ensure index vectors, src_gather, and insrc_buf_ are on the gpu. if (insrcdspl_) { // TODO: we don't actually need to copy here, just allocate + associate // storage on the device cnrn_target_copyin(insrc_buf_, insrcdspl_[nrnmpi_numprocs]); } for (int tid = 0; tid < nrn_nthread; ++tid) { const NrnThread* nt = nrn_threads + tid; if (!nt->compute_gpu) { continue; } const TransferThreadData& ttd = transfer_thread_data_[tid]; if (!ttd.src_indices.empty()) { cnrn_target_copyin(ttd.src_indices.data(), ttd.src_indices.size()); // TODO: we don't actually need to copy here, just allocate + // associate storage on the device. cnrn_target_copyin(ttd.src_gather.data(), ttd.src_gather.size()); } if (ttd.insrc_indices.size()) { cnrn_target_copyin(ttd.insrc_indices.data(), ttd.insrc_indices.size()); } } } void nrn_partrans::delete_gap_indices_from_device() { if (insrcdspl_) { int n_insrc_buf = insrcdspl_[nrnmpi_numprocs]; cnrn_target_delete(insrc_buf_, n_insrc_buf); } for (int tid = 0; tid < nrn_nthread; ++tid) { const NrnThread* nt = nrn_threads + tid; if (!nt->compute_gpu) { continue; } TransferThreadData& ttd = transfer_thread_data_[tid]; if (!ttd.src_indices.empty()) { cnrn_target_delete(ttd.src_indices.data(), ttd.src_indices.size()); cnrn_target_delete(ttd.src_gather.data(), ttd.src_gather.size()); } if (!ttd.insrc_indices.empty()) { cnrn_target_delete(ttd.insrc_indices.data(), ttd.insrc_indices.size()); } } } } // namespace coreneuron ================================================ FILE: coreneuron/network/partrans.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/sim/multicore.hpp" #ifndef NRNLONGSGID #define NRNLONGSGID 0 #endif #if NRNLONGSGID using sgid_t = int64_t; #else using sgid_t = int; #endif namespace coreneuron { struct Memb_list; extern bool nrn_have_gaps; extern void nrnmpi_v_transfer(); extern void nrnthread_v_transfer(NrnThread*); namespace nrn_partrans { /** The basic problem is to copy sources to targets. * It may be the case that a source gets copied to several targets. * Sources and targets are a set of indices in NrnThread.data. * A copy may be intrathread, interthread, interprocess. * Copies happen every time step so efficiency is desirable. * SetupTransferInfo gives us the source and target (sid, type, index) triples * for a thread and all the global threads define what gets copied where. * Need to process that info into TransferThreadData for each thread and * the interprocessor mpi buffers insrc_buf_ and outsrc_buf transfered with * MPI_Alltoallv, hopefully with a more or less optimal ordering. * The compute strategy is: 1) Each thread copies its NrnThread.data source * items to outsrc_buf_. 2) MPI_Allgatherv transfers outsrc_buf_ to insrc_buf_. * 3) Each thread, copies insrc_buf_ values to Nrnthread.data target. * * Optimal ordering is probably beyond our reach but a few considerations * may be useful. The typical use is for gap junctions where only voltage * transferred and all instances of the HalfGap Point_process receive a * voltage. Two situations are common. Voltage transfer is sparse and one * to one, i.e many compartments do not have gap junctions, and those that do * have only one. The other situation is that all compartments have gap * junctions (e.g. syncytium of single compartment cells in the heart) and * the voltage needs to be transferred to all neighboring cells (e.g. 6-18 * cells can be neighbors to the central cell). So on the target side, it * might be good to copy to the target in target index order from the * input_buf_. And on the source side, it is certainly simple to scatter * to the outbut_buf_ in NrnThread.data order. Note that one expects a wide * scatter to the outsrc_buf and also a wide scatter within the insrc_buf_. **/ /* * In partrans.cpp: nrnmpi_v_transfer * Copy NrnThead.data to outsrc_buf_ for all threads via * gpu: gather src_gather[i] = NrnThread._data[src_indices[i]]; * gpu to host src_gather * cpu: outsrc_buf_[outsrc_indices[i]] = src_gather[gather2outsrc_indices[i]]; * * MPI_Allgatherv outsrc_buf_ to insrc_buf_ * * host to gpu insrc_buf_ * * In partrans.cpp: nrnthread_v_transfer * insrc_buf_ to NrnThread._data via * NrnThread.data[tar_indices[i]] = insrc_buf_[insrc_indices[i]]; * where tar_indices depends on layout, type, etc. */ struct TransferThreadData { std::vector src_indices; // indices into NrnThread._data std::vector src_gather; // copy of NrnThread._data[src_indices] std::vector gather2outsrc_indices; // ix of src_gather that send into outsrc_indices std::vector outsrc_indices; // ix of outsrc_buf that receive src_gather values std::vector insrc_indices; // insrc_buf_ indices copied to ... std::vector tar_indices; // indices of NrnThread.data. }; extern TransferThreadData* transfer_thread_data_; /* array for threads */ } // namespace nrn_partrans } // namespace coreneuron // For direct transfer, // must be same as corresponding struct SetupTransferInfo in NEURON struct SetupTransferInfo { std::vector src_sid; std::vector src_type; std::vector src_index; std::vector tar_sid; std::vector tar_type; std::vector tar_index; }; namespace coreneuron { namespace nrn_partrans { extern SetupTransferInfo* setup_info_; /* array for threads exists only during setup*/ extern void gap_mpi_setup(int ngroup); extern void gap_data_indices_setup(NrnThread* nt); extern void copy_gap_indices_to_device(); extern void delete_gap_indices_from_device(); extern void gap_cleanup(); extern double* insrc_buf_; // Receive buffer for gap voltages extern double* outsrc_buf_; // Send buffer for gap voltages extern int *insrccnt_, *insrcdspl_, *outsrccnt_, *outsrcdspl_; } // namespace nrn_partrans } // namespace coreneuron ================================================ FILE: coreneuron/network/partrans_setup.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include "coreneuron/coreneuron.hpp" #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/network/partrans.hpp" #include "coreneuron/nrniv/nrniv_decl.h" namespace coreneuron { using namespace coreneuron::nrn_partrans; SetupTransferInfo* nrn_partrans::setup_info_; class SidInfo { public: std::vector tids_; std::vector indices_; }; } // namespace coreneuron #if NRNLONGSGID #define sgid_alltoallv nrnmpi_long_alltoallv #else #define sgid_alltoallv nrnmpi_int_alltoallv #endif #define HAVEWANT_t sgid_t #define HAVEWANT_alltoallv sgid_alltoallv #define HAVEWANT2Int std::map #include "coreneuron/network/have2want.h" namespace coreneuron { using namespace coreneuron::nrn_partrans; void nrn_partrans::gap_mpi_setup(int ngroup) { // printf("%d gap_mpi_setup ngroup=%d\n", nrnmpi_myid, ngroup); // count total_nsrc, total_ntar and allocate. // Possible either or both are 0 on this process. size_t total_nsrc = 0, total_ntar = 0; for (int tid = 0; tid < ngroup; ++tid) { auto& si = setup_info_[tid]; total_nsrc += si.src_sid.size(); total_ntar += si.tar_sid.size(); } // have and want arrays (add 1 to guarantee new ... is an array.) sgid_t* have = new sgid_t[total_nsrc + 1]; sgid_t* want = new sgid_t[total_ntar + 1]; // map from source sid to (tid, index), ie. NrnThread[tid]._data[index]. // and target sid to lists of (tid, index) for memb_list // also count the map sizes and fill have and want arrays std::map src2info; std::map tar2info; int src2info_size = 0, tar2info_size = 0; // number of unique sids for (int tid = 0; tid < ngroup; ++tid) { auto& si = setup_info_[tid]; // Sgid has unique source. for (size_t i = 0; i < si.src_sid.size(); ++i) { sgid_t sid = si.src_sid[i]; SidInfo sidinfo; sidinfo.tids_.push_back(tid); sidinfo.indices_.push_back(i); src2info[sid] = sidinfo; have[src2info_size] = sid; src2info_size++; } // Possibly many targets of same sid // Only want unique sids. From each, can obtain all its targets. for (size_t i = 0; i < si.tar_sid.size(); ++i) { sgid_t sid = si.tar_sid[i]; if (tar2info.find(sid) == tar2info.end()) { tar2info[sid] = SidInfo(); want[tar2info_size] = sid; tar2info_size++; } SidInfo& sidinfo = tar2info[sid]; sidinfo.tids_.push_back(tid); sidinfo.indices_.push_back(i); } } // 2) Call the have_to_want function. sgid_t* send_to_want; sgid_t* recv_from_have; have_to_want(have, src2info_size, want, tar2info_size, send_to_want, outsrccnt_, outsrcdspl_, recv_from_have, insrccnt_, insrcdspl_, default_rendezvous); int nhost = nrnmpi_numprocs; // sanity check. all the sgids we are asked to send, we actually have for (int i = 0; i < outsrcdspl_[nhost]; ++i) { sgid_t sgid = send_to_want[i]; assert(src2info.find(sgid) != src2info.end()); } // sanity check. all the sgids we receive, we actually need. for (int i = 0; i < insrcdspl_[nhost]; ++i) { sgid_t sgid = recv_from_have[i]; assert(tar2info.find(sgid) != tar2info.end()); } #if CORENRN_DEBUG printf("%d mpi outsrccnt_, outsrcdspl_, insrccnt, insrcdspl_\n", nrnmpi_myid); for (int i = 0; i < nrnmpi_numprocs; ++i) { printf("%d : %d %d %d %d\n", nrnmpi_myid, outsrccnt_[i], outsrcdspl_[i], insrccnt_[i], insrcdspl_[i]); } #endif // clean up a little delete[] have; delete[] want; insrc_buf_ = new double[insrcdspl_[nhost]]; outsrc_buf_ = new double[outsrcdspl_[nhost]]; // for i: src_gather[i] = NrnThread._data[src_indices[i]] // for j: outsrc_buf[outsrc_indices[j]] = src_gather[gather2outsrc_indices[j]] // src_indices point into NrnThread._data // Many outsrc_indices elements can point to the same src_gather element // but only if an sgid src datum is destined for multiple ranks. for (int i = 0; i < outsrcdspl_[nhost]; ++i) { sgid_t sgid = send_to_want[i]; SidInfo& sidinfo = src2info[sgid]; // only one item in the lists. int tid = sidinfo.tids_[0]; int setup_info_index = sidinfo.indices_[0]; auto& si = setup_info_[tid]; auto& ttd = transfer_thread_data_[tid]; // Note that src_index points into NrnThread.data, as it has already // been transformed using original src_type and src_index via // stdindex2ptr. // For copying into outsrc_buf from src_gather. This is from // NrnThread._data, fixup to "from src_gather" below. ttd.gather2outsrc_indices.push_back(si.src_index[setup_info_index]); ttd.outsrc_indices.push_back(i); } // Need to know src_gather index given NrnThread._data index // to compute gather2outsrc_indices. And the update outsrc_indices so that // for a given thread // for j: outsrc_buf[outsrc_indices[j]] = src_gather[gather2outsrc_indices[j]] for (int tid = 0; tid < ngroup; ++tid) { auto& ttd = transfer_thread_data_[tid]; std::map data2gather_indices; for (size_t i = 0; i < ttd.src_indices.size(); ++i) { data2gather_indices[ttd.src_indices[i]] = i; } for (size_t i = 0; i < ttd.outsrc_indices.size(); ++i) { ttd.gather2outsrc_indices[i] = data2gather_indices[ttd.gather2outsrc_indices[i]]; } } // Which insrc_indices point into which NrnThread.data // An sgid occurs at most once in the process recv_from_have. // But it might get distributed to more than one thread and to // several targets in a thread (specified by tar2info) // insrc_indices is parallel to tar_indices and has size ntar of the thread. // insrc_indices[i] is the index into insrc_buf // tar_indices[i] is the index into NrnThread.data // i.e. NrnThead._data[tar_indices[i]] = insrc_buf[insrc_indices[i]] for (int i = 0; i < insrcdspl_[nhost]; ++i) { sgid_t sgid = recv_from_have[i]; SidInfo& sidinfo = tar2info[sgid]; // there may be several items in the lists. for (size_t j = 0; j < sidinfo.tids_.size(); ++j) { int tid = sidinfo.tids_[j]; int index = sidinfo.indices_[j]; transfer_thread_data_[tid].insrc_indices[index] = i; } } #if CORENRN_DEBUG // things look ok so far? for (int tid = 0; tid < ngroup; ++tid) { SetupTransferInfo& si = setup_info_[tid]; nrn_partrans::TransferThreadData& ttd = transfer_thread_data_[tid]; for (size_t i = 0; i < si.src_sid.size(); ++i) { printf("%d %d src sid=%d v_index=%d %g\n", nrnmpi_myid, tid, si.src_sid[i], ttd.src_indices[i], nrn_threads[tid]._data[ttd.src_indices[i]]); } for (size_t i = 0; i < ttd.tar_indices.size(); ++i) { printf("%d %d src sid=i%zd tar_index=%d %g\n", nrnmpi_myid, tid, i, ttd.tar_indices[i], nrn_threads[tid]._data[ttd.tar_indices[i]]); } } #endif delete[] send_to_want; delete[] recv_from_have; } /** * For now, until conceptualization of the ordering is clear, * just replace src setup_info_ indices values with stdindex2ptr determined * index into NrnThread._data **/ void nrn_partrans::gap_data_indices_setup(NrnThread* n) { NrnThread& nt = *n; auto& ttd = transfer_thread_data_[nt.id]; auto& sti = setup_info_[nt.id]; ttd.src_gather.resize(sti.src_sid.size()); ttd.src_indices.resize(sti.src_sid.size()); ttd.insrc_indices.resize(sti.tar_sid.size()); ttd.tar_indices.resize(sti.tar_sid.size()); // For copying into src_gather from NrnThread._data for (size_t i = 0; i < sti.src_sid.size(); ++i) { double* d = stdindex2ptr(sti.src_type[i], sti.src_index[i], nt); sti.src_index[i] = int(d - nt._data); } // For copying into NrnThread._data from insrc_buf. for (size_t i = 0; i < sti.tar_sid.size(); ++i) { double* d = stdindex2ptr(sti.tar_type[i], sti.tar_index[i], nt); // todo : this should be revisited once nt._data will be broken // into mechanism specific data sti.tar_index[i] = int(d - nt._data); } // Here we could reorder sti.src_... according to NrnThread._data index // order // copy into TransferThreadData ttd.src_indices = sti.src_index; ttd.tar_indices = sti.tar_index; } void nrn_partrans::gap_cleanup() { if (transfer_thread_data_) { delete[] transfer_thread_data_; transfer_thread_data_ = nullptr; } if (insrc_buf_) { delete[] insrc_buf_; insrc_buf_ = nullptr; delete[] insrccnt_; insrccnt_ = nullptr; delete[] insrcdspl_; insrcdspl_ = nullptr; delete[] outsrc_buf_; outsrc_buf_ = nullptr; delete[] outsrccnt_; outsrccnt_ = nullptr; delete[] outsrcdspl_; outsrcdspl_ = nullptr; } } } // namespace coreneuron ================================================ FILE: coreneuron/network/tnode.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include // experiment with ordering strategies for Tree Nodes namespace coreneuron { class TNode; using VecTNode = std::vector; /** * \class TNode * \brief TNode is the tree node that represents the tree of the compartments */ class TNode { public: TNode(int ix); virtual ~TNode(); TNode* parent; VecTNode children; size_t mkhash(); /// Hash algorith that generates a hash based on the hash of the children and /// the number of compartments of the children size_t hash; /// Hash value generated by mkhash size_t treesize; /// Total number of compartments from the current node and below size_t nodevec_index; /// index in nodevec that is set in check() /// In cell permute 2 this is set as Breadth First traversal size_t treenode_order; /// For cell permute 1 (Interleaved): /// - This is the id given to the compartments based on a Breadth First /// access on the tree that is created in the original circuit /// - This is what makes the cell ordering interleaved /// For cell permute 2 (Constant Depth): /// VVVTN: Vector (groups of cells) of vector (levels of this group of /// cells. Maxsize = maxlevel) of vector of TNodes This changes 3 times /// during cell permute 2: /// 1. According to the sorting of the nodes of each level /// 2. According to the sorting of the parents' treenode_order of the /// previous ordering /// 3. According to children and parents data races. Parents and /// children of the tree are moved by question2() so that threads that /// exist on the same warp don't have data races when updating the /// children and parent variables, so that threads have to wait in /// atomic instructions. If there are any races then those are solved by /// atomic instructions. size_t level; /// level of of this compartment in the tree size_t cellindex; /// Cell ID that this compartment belongs to size_t groupindex; /// Initialized index / groupsize int nodeindex; }; size_t level_from_leaf(VecTNode&); size_t level_from_root(VecTNode&); /** * \brief Implementation of the advanced interleaving strategy (interleave_permute_type == 2) * * The main steps are the following: * 1. warp_balance function creates balanced groups of cells. * 2. The compartments/tree nodes populate the groups vector (VVVTN) based on their groudindex and * their level (see level_from_root). * 3. The analyze() & question2() functions (operating per group) make sure that each cell is still * a tree (treenode_order) and that the dependent nodes belong to separate warps. */ void group_order2(VecTNode&, size_t groupsize, size_t ncell); size_t dist2child(TNode* nd); /** * \brief Use of the LPT (Least Processing Time) algorithm to create balanced groups of cells. * * Competing objectives are to keep identical cells together and also balance warps. * * \param ncell number of cells * \param nodevec vector of compartments from all cells * \return number of warps */ size_t warp_balance(size_t ncell, VecTNode& nodevec); #define warpsize 32 } // namespace coreneuron ================================================ FILE: coreneuron/network/tqueue.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/tqueue.hpp" namespace coreneuron { // splay tree + bin queue limited to fixed step method // for event-sets or priority queues // this starts from the sptqueue.cpp file and adds a bin queue /* Derived from David Brower's c translation of pascal code by Douglas Jones. */ /* The original c code is included from this file but note that instead of struct _spblk, we are really using TQItem */ BinQ::BinQ() { nbin_ = 1000; bins_ = new TQItem*[nbin_]; for (int i = 0; i < nbin_; ++i) { bins_[i] = 0; } qpt_ = 0; tt_ = 0.; } BinQ::~BinQ() { for (int i = 0; i < nbin_; ++i) { assert(!bins_[i]); } delete[] bins_; vec_bins.clear(); } void BinQ::resize(int size) { // printf("BinQ::resize from %d to %d\n", nbin_, size); assert(size >= nbin_); TQItem** bins = new TQItem*[size]; for (int i = nbin_; i < size; ++i) { bins[i] = 0; } for (int i = 0, j = qpt_; i < nbin_; ++i, ++j) { if (j >= nbin_) { j = 0; } bins[i] = bins_[j]; for (auto q = bins[i]; q; q = q->left_) { q->cnt_ = i; } } delete[] bins_; bins_ = bins; nbin_ = size; qpt_ = 0; } void BinQ::enqueue(double td, TQItem* q) { int idt = (int) ((td - tt_) * rev_dt + 1.e-10); assert(idt >= 0); if (idt >= nbin_) { resize(idt + 1000); } // assert (idt < nbin_); idt += qpt_; if (idt >= nbin_) { idt -= nbin_; } // printf("enqueue: idt=%d qpt=%d nbin_=%d\n", idt, qpt_, nbin_); assert(idt < nbin_); q->cnt_ = idt; // only for iteration q->left_ = bins_[idt]; bins_[idt] = q; } TQItem* BinQ::dequeue() { TQItem* q = bins_[qpt_]; if (q) { bins_[qpt_] = q->left_; } return q; } TQItem* BinQ::first() { for (int i = 0; i < nbin_; ++i) { if (bins_[i]) { return bins_[i]; } } return 0; } TQItem* BinQ::next(TQItem* q) { if (q->left_) { return q->left_; } for (int i = q->cnt_ + 1; i < nbin_; ++i) { if (bins_[i]) { return bins_[i]; } } return 0; } void BinQ::remove(TQItem* q) { TQItem* q1 = bins_[q->cnt_]; if (q1 == q) { bins_[q->cnt_] = q->left_; return; } for (TQItem* q2 = q1->left_; q2; q1 = q2, q2 = q2->left_) { if (q2 == q) { q1->left_ = q->left_; return; } } } //#include "coreneuron/nrniv/sptree.h" /* * The following code implements the basic operations on * an event-set or priority-queue implemented using splay trees: * Hines changed to void spinit(SPTREE**) for use with TQueue. * SPTREE *spinit( compare ) Make a new tree * SPBLK *spenq( n, q ) Insert n in q after all equal keys. * SPBLK *spdeq( np ) Return first key under *np, removing it. * void splay( n, q ) n (already in q) becomes the root. * int n = sphead( q ) n is the head item in q (not removed). * spdelete( n, q ) n is removed from q. * * In the above, n points to an SPBLK type, while q points to an * SPTREE. * * The implementation used here is based on the implementation * which was used in the tests of splay trees reported in: * * An Empirical Comparison of Priority-Queue and Event-Set Implementations, * by Douglas W. Jones, Comm. ACM 29, 4 (Apr. 1986) 300-311. * * The changes made include the addition of the enqprior * operation and the addition of up-links to allow for the splay * operation. The basic splay tree algorithms were originally * presented in: * * Self Adjusting Binary Trees, * by D. D. Sleator and R. E. Tarjan, * Proc. ACM SIGACT Symposium on Theory * of Computing (Boston, Apr 1983) 235-245. * * The enq and enqprior routines use variations on the * top-down splay operation, while the splay routine is bottom-up. * All are coded for speed. * * Written by: * Douglas W. Jones * * Translated to C by: * David Brower, daveb@rtech.uucp * * Thu Oct 6 12:11:33 PDT 1988 (daveb) Fixed spdeq, which was broken * handling one-node trees. I botched the pascal translation of * a VAR parameter. */ /*---------------- * * spinit() -- initialize an empty splay tree * */ void spinit(SPTREE* q) { q->enqcmps = 0; q->root = nullptr; } /*---------------- * * spenq() -- insert item in a tree. * * put n in q after all other nodes with the same key; when this is * done, n will be the root of the splay tree representing q, all nodes * in q with keys less than or equal to that of n will be in the * left subtree, all with greater keys will be in the right subtree; * the tree is split into these subtrees from the top down, with rotations * performed along the way to shorten the left branch of the right subtree * and the right branch of the left subtree */ SPBLK* spenq(SPBLK* n, SPTREE* q) { SPBLK* left; /* the rightmost node in the left tree */ SPBLK* right; /* the leftmost node in the right tree */ SPBLK* next; /* the root of the unsplit part */ SPBLK* temp; double key; n->uplink = nullptr; next = q->root; q->root = n; if (next == nullptr) /* trivial enq */ { n->leftlink = nullptr; n->rightlink = nullptr; } else /* difficult enq */ { key = n->key; left = n; right = n; /* n's left and right children will hold the right and left splayed trees resulting from splitting on n->key; note that the children will be reversed! */ q->enqcmps++; if (STRCMP(next->key, key) > 0) goto two; one: /* assert next->key <= key */ do /* walk to the right in the left tree */ { temp = next->rightlink; if (temp == nullptr) { left->rightlink = next; next->uplink = left; right->leftlink = nullptr; goto done; /* job done, entire tree split */ } q->enqcmps++; if (STRCMP(temp->key, key) > 0) { left->rightlink = next; next->uplink = left; left = next; next = temp; goto two; /* change sides */ } next->rightlink = temp->leftlink; if (temp->leftlink != nullptr) temp->leftlink->uplink = next; left->rightlink = temp; temp->uplink = left; temp->leftlink = next; next->uplink = temp; left = temp; next = temp->rightlink; if (next == nullptr) { right->leftlink = nullptr; goto done; /* job done, entire tree split */ } q->enqcmps++; } while (STRCMP(next->key, key) <= 0); /* change sides */ two: /* assert next->key > key */ do /* walk to the left in the right tree */ { temp = next->leftlink; if (temp == nullptr) { right->leftlink = next; next->uplink = right; left->rightlink = nullptr; goto done; /* job done, entire tree split */ } q->enqcmps++; if (STRCMP(temp->key, key) <= 0) { right->leftlink = next; next->uplink = right; right = next; next = temp; goto one; /* change sides */ } next->leftlink = temp->rightlink; if (temp->rightlink != nullptr) temp->rightlink->uplink = next; right->leftlink = temp; temp->uplink = right; temp->rightlink = next; next->uplink = temp; right = temp; next = temp->leftlink; if (next == nullptr) { left->rightlink = nullptr; goto done; /* job done, entire tree split */ } q->enqcmps++; } while (STRCMP(next->key, key) > 0); /* change sides */ goto one; done: /* split is done, branches of n need reversal */ temp = n->leftlink; n->leftlink = n->rightlink; n->rightlink = temp; } return (n); } /* spenq */ /*---------------- * * spdeq() -- return and remove head node from a subtree. * * remove and return the head node from the node set; this deletes * (and returns) the leftmost node from q, replacing it with its right * subtree (if there is one); on the way to the leftmost node, rotations * are performed to shorten the left branch of the tree */ SPBLK* spdeq(SPBLK** np) /* pointer to a node pointer */ { SPBLK* deq; /* one to return */ SPBLK* next; /* the next thing to deal with */ SPBLK* left; /* the left child of next */ SPBLK* farleft; /* the left child of left */ SPBLK* farfarleft; /* the left child of farleft */ if (np == nullptr || *np == nullptr) { deq = nullptr; } else { next = *np; left = next->leftlink; if (left == nullptr) { deq = next; *np = next->rightlink; if (*np != nullptr) (*np)->uplink = nullptr; } else for (;;) /* left is not null */ { /* next is not it, left is not nullptr, might be it */ farleft = left->leftlink; if (farleft == nullptr) { deq = left; next->leftlink = left->rightlink; if (left->rightlink != nullptr) left->rightlink->uplink = next; break; } /* next, left are not it, farleft is not nullptr, might be it */ farfarleft = farleft->leftlink; if (farfarleft == nullptr) { deq = farleft; left->leftlink = farleft->rightlink; if (farleft->rightlink != nullptr) farleft->rightlink->uplink = left; break; } /* next, left, farleft are not it, rotate */ next->leftlink = farleft; farleft->uplink = next; left->leftlink = farleft->rightlink; if (farleft->rightlink != nullptr) farleft->rightlink->uplink = left; farleft->rightlink = left; left->uplink = farleft; next = farleft; left = farfarleft; } } return (deq); } /* spdeq */ /*---------------- * * splay() -- reorganize the tree. * * the tree is reorganized so that n is the root of the * splay tree representing q; results are unpredictable if n is not * in q to start with; q is split from n up to the old root, with all * nodes to the left of n ending up in the left subtree, and all nodes * to the right of n ending up in the right subtree; the left branch of * the right subtree and the right branch of the left subtree are * shortened in the process * * this code assumes that n is not nullptr and is in q; it can sometimes * detect n not in q and complain */ void splay(SPBLK* n, SPTREE* q) { SPBLK* up; /* points to the node being dealt with */ SPBLK* prev; /* a descendent of up, already dealt with */ SPBLK* upup; /* the parent of up */ SPBLK* upupup; /* the grandparent of up */ SPBLK* left; /* the top of left subtree being built */ SPBLK* right; /* the top of right subtree being built */ left = n->leftlink; right = n->rightlink; prev = n; up = prev->uplink; while (up != nullptr) { /* walk up the tree towards the root, splaying all to the left of n into the left subtree, all to right into the right subtree */ upup = up->uplink; if (up->leftlink == prev) /* up is to the right of n */ { if (upup != nullptr && upup->leftlink == up) /* rotate */ { upupup = upup->uplink; upup->leftlink = up->rightlink; if (upup->leftlink != nullptr) upup->leftlink->uplink = upup; up->rightlink = upup; upup->uplink = up; if (upupup == nullptr) q->root = up; else if (upupup->leftlink == upup) upupup->leftlink = up; else upupup->rightlink = up; up->uplink = upupup; upup = upupup; } up->leftlink = right; if (right != nullptr) right->uplink = up; right = up; } else /* up is to the left of n */ { if (upup != nullptr && upup->rightlink == up) /* rotate */ { upupup = upup->uplink; upup->rightlink = up->leftlink; if (upup->rightlink != nullptr) upup->rightlink->uplink = upup; up->leftlink = upup; upup->uplink = up; if (upupup == nullptr) q->root = up; else if (upupup->rightlink == upup) upupup->rightlink = up; else upupup->leftlink = up; up->uplink = upupup; upup = upupup; } up->rightlink = left; if (left != nullptr) left->uplink = up; left = up; } prev = up; up = upup; } #ifdef DEBUG if (q->root != prev) { /* fprintf(stderr, " *** bug in splay: n not in q *** " ); */ abort(); } #endif n->leftlink = left; n->rightlink = right; if (left != nullptr) left->uplink = n; if (right != nullptr) right->uplink = n; q->root = n; n->uplink = nullptr; } /* splay */ /*---------------- * * sphead() -- return the "lowest" element in the tree. * * returns a reference to the head event in the event-set q, * represented as a splay tree; q->root ends up pointing to the head * event, and the old left branch of q is shortened, as if q had * been splayed about the head element; this is done by dequeueing * the head and then making the resulting queue the right son of * the head returned by spdeq; an alternative is provided which * avoids splaying but just searches for and returns a pointer to * the bottom of the left branch */ SPBLK* sphead(SPTREE* q) { SPBLK* x; /* splay version, good amortized bound */ x = spdeq(&q->root); if (x != nullptr) { x->rightlink = q->root; x->leftlink = nullptr; x->uplink = nullptr; if (q->root != nullptr) q->root->uplink = x; } q->root = x; /* alternative version, bad amortized bound, but faster on the average */ return (x); } /* sphead */ /*---------------- * * spdelete() -- Delete node from a tree. * * n is deleted from q; the resulting splay tree has been splayed * around its new root, which is the successor of n * */ void spdelete(SPBLK* n, SPTREE* q) { SPBLK* x; splay(n, q); x = spdeq(&q->root->rightlink); if (x == nullptr) /* empty right subtree */ { q->root = q->root->leftlink; if (q->root) q->root->uplink = nullptr; } else /* non-empty right subtree */ { x->uplink = nullptr; x->leftlink = q->root->leftlink; x->rightlink = q->root->rightlink; if (x->leftlink != nullptr) x->leftlink->uplink = x; if (x->rightlink != nullptr) x->rightlink->uplink = x; q->root = x; } } /* spdelete */ } // namespace coreneuron ================================================ FILE: coreneuron/network/tqueue.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once /* ** SPTREE: The following type declarations provide the binary tree ** representation of event-sets or priority queues needed by splay trees ** ** assumes that data and datb will be provided by the application ** to hold all application specific information ** ** assumes that key will be provided by the application, comparable ** with the compare function applied to the addresses of two keys. */ // bin queue for the fixed step method for NetCons and PreSyns. Splay tree // for others. // fifo for the NetCons and PreSyns with same delay. Splay tree for // others (especially SelfEvents). // note that most methods below assume a TQItem is in the splay tree // For the bin part, only insert_fifo, and remove make sense, // The bin part assumes a fixed step method. #include #include #include #include #include #include namespace coreneuron { #define STRCMP(a, b) (a - b) class TQItem; #define SPBLK TQItem #define leftlink left_ #define rightlink right_ #define uplink parent_ #define cnt cnt_ #define key t_ struct SPTREE { SPBLK* root; /* root node */ /* Statistics, not strictly necessary, but handy for tuning */ int enqcmps; /* compares in spenq */ }; #define spinit sptq_spinit #define spenq sptq_spenq #define spdeq sptq_spdeq #define splay sptq_splay #define sphead sptq_sphead #define spdelete sptq_spdelete extern void spinit(SPTREE*); /* init tree */ extern SPBLK* spenq(SPBLK*, SPTREE*); /* insert item into the tree */ extern SPBLK* spdeq(SPBLK**); /* return and remove lowest item in subtree */ extern void splay(SPBLK*, SPTREE*); /* reorganize tree */ extern SPBLK* sphead(SPTREE*); /* return first node in tree */ extern void spdelete(SPBLK*, SPTREE*); /* delete node from tree */ struct DiscreteEvent; class TQItem { public: DiscreteEvent* data_ = nullptr; double t_ = 0; TQItem* left_ = nullptr; TQItem* right_ = nullptr; TQItem* parent_ = nullptr; int cnt_ = 0; // reused: -1 means it is in the splay tree, >=0 gives bin }; using TQPair = std::pair; struct less_time { bool operator()(const TQPair& x, const TQPair& y) const { return x.first > y.first; } }; // helper class for the TQueue (SplayTBinQueue). class BinQ { public: BinQ(); ~BinQ(); void enqueue(double tt, TQItem*); void shift(double tt) { assert(!bins_[qpt_]); tt_ = tt; if (++qpt_ >= nbin_) { qpt_ = 0; } } TQItem* top() { return bins_[qpt_]; } TQItem* dequeue(); double tbin() { return tt_; } // for iteration TQItem* first(); TQItem* next(TQItem*); void remove(TQItem*); void resize(int); private: double tt_; // time at beginning of qpt_ interval int nbin_, qpt_; TQItem** bins_; std::vector> vec_bins; }; enum container { spltree, pq_que }; template class TQueue { public: TQueue(); ~TQueue(); inline TQItem* least() { return least_; } inline TQItem* insert(double t, DiscreteEvent* data); inline TQItem* enqueue_bin(double t, DiscreteEvent* data); inline TQItem* dequeue_bin() { return binq_->dequeue(); } inline void shift_bin(double _t_) { ++nshift_; binq_->shift(_t_); } inline TQItem* top() { return binq_->top(); } inline TQItem* atomic_dq(double til); inline void remove(TQItem*); inline void move(TQItem*, double tnew); int nshift_; /// Priority queue of vectors for queuing the events. enqueuing for move() and /// move_least_nolock() is not implemented std::priority_queue, less_time> pq_que_; /// Types of queuing statistics enum qtype { enq = 0, spike, ite, deq }; private: double least_t_nolock() { if (least_) { return least_->t_; } else { return 1e15; } } void move_least_nolock(double tnew); SPTREE* sptree_; public: BinQ* binq_; private: TQItem* least_; TQPair make_TQPair(TQItem* p) { return TQPair(p->t_, p); } }; } // namespace coreneuron #include "coreneuron/network/tqueue.ipp" ================================================ FILE: coreneuron/network/tqueue.ipp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #ifndef tqueue_ipp_ #define tqueue_ipp_ #include #include #include #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/tqueue.hpp" namespace coreneuron { // splay tree + bin queue limited to fixed step method // for event-sets or priority queues // this starts from the sptqueue.cpp file and adds a bin queue /* Derived from David Brower's c translation of pascal code by Douglas Jones. */ /* The original c code is included from this file but note that instead of struct _spblk, we are really using TQItem */ template TQueue::TQueue() { nshift_ = 0; sptree_ = new SPTREE; spinit(sptree_); binq_ = new BinQ; least_ = 0; } template TQueue::~TQueue() { SPBLK *q, *q2; /// Clear the binq for (q = binq_->first(); q; q = q2) { q2 = binq_->next(q); binq_->remove(q); delete q; } delete binq_; if (least_) { delete least_; least_ = nullptr; } /// Clear the splay tree while ((q = spdeq(&sptree_->root)) != nullptr) { delete q; } delete sptree_; /// Clear the priority queue while (pq_que_.size()) { delete pq_que_.top().second; pq_que_.pop(); } } template TQItem* TQueue::enqueue_bin(double td, DiscreteEvent* d) { TQItem* i = new TQItem; i->data_ = d; i->t_ = td; binq_->enqueue(td, i); return i; } /// Splay tree priority queue implementation template <> inline void TQueue::move_least_nolock(double tnew) { TQItem* b = least(); if (b) { b->t_ = tnew; TQItem* nl; nl = sphead(sptree_); if (nl && (tnew > nl->t_)) { least_ = spdeq(&sptree_->root); spenq(b, sptree_); } } } /// STL priority queue implementation template <> inline void TQueue::move_least_nolock(double tnew) { TQItem* b = least(); if (b) { b->t_ = tnew; TQItem* nl; nl = pq_que_.top().second; if (nl && (tnew > nl->t_)) { least_ = nl; pq_que_.pop(); pq_que_.push(make_TQPair(b)); } } } /// Splay tree priority queue implementation template <> inline void TQueue::move(TQItem* i, double tnew) { if (i == least_) { move_least_nolock(tnew); } else if (tnew < least_->t_) { spdelete(i, sptree_); i->t_ = tnew; spenq(least_, sptree_); least_ = i; } else { spdelete(i, sptree_); i->t_ = tnew; spenq(i, sptree_); } } /// STL priority queue implementation template <> inline void TQueue::move(TQItem* i, double tnew) { if (i == least_) { move_least_nolock(tnew); } else if (tnew < least_->t_) { TQItem* qmove = new TQItem; qmove->data_ = i->data_; qmove->t_ = tnew; qmove->cnt_ = i->cnt_; i->t_ = -1.; pq_que_.push(make_TQPair(least_)); least_ = qmove; } else { TQItem* qmove = new TQItem; qmove->data_ = i->data_; qmove->t_ = tnew; qmove->cnt_ = i->cnt_; i->t_ = -1.; pq_que_.push(make_TQPair(qmove)); } } /// Splay tree priority queue implementation template <> inline TQItem* TQueue::insert(double tt, DiscreteEvent* d) { TQItem* i = new TQItem; i->data_ = d; i->t_ = tt; i->cnt_ = -1; if (tt < least_t_nolock()) { if (least_) { /// Probably storing both time and event which has the time is redundant, but the event /// is then returned /// to the upper level call stack function. If we were to eliminate i->t_ and i->cnt_ /// fields, /// we need to make sure we are not braking anything. spenq(least_, sptree_); } least_ = i; } else { spenq(i, sptree_); } return i; } /// STL priority queue implementation template <> inline TQItem* TQueue::insert(double tt, DiscreteEvent* d) { TQItem* i = new TQItem; i->data_ = d; i->t_ = tt; i->cnt_ = -1; if (tt < least_t_nolock()) { if (least_) { /// Probably storing both time and event which has the time is redundant, but the event /// is then returned /// to the upper level call stack function. If we were to eliminate i->t_ and i->cnt_ /// fields, /// we need to make sure we are not braking anything. pq_que_.push(make_TQPair(least_)); } least_ = i; } else { pq_que_.push(make_TQPair(i)); } return i; } /// Splay tree priority queue implementation template <> inline void TQueue::remove(TQItem* q) { if (q) { if (q == least_) { if (sptree_->root) { least_ = spdeq(&sptree_->root); } else { least_ = nullptr; } } else { spdelete(q, sptree_); } delete q; } } /// STL priority queue implementation template <> inline void TQueue::remove(TQItem* q) { if (q) { if (q == least_) { if (pq_que_.size()) { least_ = pq_que_.top().second; pq_que_.pop(); } else { least_ = nullptr; } } else { q->t_ = -1.; } } } /// Splay tree priority queue implementation template <> inline TQItem* TQueue::atomic_dq(double tt) { TQItem* q = nullptr; if (least_ && least_->t_ <= tt) { q = least_; if (sptree_->root) { least_ = spdeq(&sptree_->root); } else { least_ = nullptr; } } return q; } /// STL priority queue implementation template <> inline TQItem* TQueue::atomic_dq(double tt) { TQItem* q = nullptr; if (least_ && least_->t_ <= tt) { q = least_; // int qsize = pq_que_.size(); // printf("map size: %d\n", msize); /// This while loop is to delete events whose times have been moved with the ::move /// function, /// but in fact events were left in the queue since the only function available is pop while (pq_que_.size() && pq_que_.top().second->t_ < 0.) { delete pq_que_.top().second; pq_que_.pop(); } if (pq_que_.size()) { least_ = pq_que_.top().second; pq_que_.pop(); } else { least_ = nullptr; } } return q; } } // namespace coreneuron #endif ================================================ FILE: coreneuron/nrnconf.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/config/version_macros.hpp" #include "coreneuron/utils/offload.hpp" #include #include #include #include #include namespace coreneuron { #define NRNBBCORE 1 using Datum = int; using Pfri = int (*)(); using Symbol = char; #define VEC_A(i) (_nt->_actual_a[(i)]) #define VEC_B(i) (_nt->_actual_b[(i)]) #define VEC_D(i) (_nt->_actual_d[(i)]) #define VEC_RHS(i) (_nt->_actual_rhs[(i)]) #define VEC_V(i) (_nt->_actual_v[(i)]) #define VEC_AREA(i) (_nt->_actual_area[(i)]) #define VECTORIZE 1 extern double celsius; extern double pi; extern int secondorder; extern double t, dt; extern int rev_dt; extern bool stoprun; extern const char* bbcore_write_version; #define tstopbit (1 << 15) #define tstopset stoprun |= tstopbit #define tstopunset stoprun &= (~tstopbit) extern void* nrn_cacheline_alloc(void** memptr, size_t size); extern void* emalloc_align(size_t size, size_t alignment); extern void* ecalloc_align(size_t n, size_t size, size_t alignment); extern void check_bbcore_write_version(const char*); } // namespace coreneuron ================================================ FILE: coreneuron/nrniv/nrniv_decl.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include "coreneuron/network/netcon.hpp" namespace coreneuron { /// Mechanism type to be used from stdindex2ptr and nrn_dblpntr2nrncore (in Neuron) /// Values of the mechanism types should be negative numbers to avoid any conflict with /// mechanism types of Memb_list(>0) or time(0) passed from Neuron enum mech_type { voltage = -1, i_membrane_ = -2 }; extern bool cvode_active_; /// Vector of maps for negative presyns extern std::vector> neg_gid2out; /// Maps for ouput and input presyns extern std::map gid2out; extern std::map gid2in; /// InputPreSyn.nc_index_ to + InputPreSyn.nc_cnt_ give the NetCon* extern std::vector netcon_in_presyn_order_; /// Only for setup vector of netcon source gids and mindelay determination extern std::vector nrnthreads_netcon_srcgid; /// Companion to nrnthreads_netcon_srcgid when src gid is negative to allow /// determination of the NrnThread of the source PreSyn. extern std::vector> nrnthreads_netcon_negsrcgid_tid; extern void mk_mech(const char* path); extern void set_globals(const char* path, bool cli_global_seed, int cli_global_seed_value); extern void mk_netcvode(void); extern void nrn_p_construct(void); extern double* stdindex2ptr(int mtype, int index, NrnThread&); extern void delete_trajectory_requests(NrnThread&); extern void nrn_cleanup(); extern void nrn_cleanup_ion_map(); extern void BBS_netpar_solve(double); extern void nrn_mkPatternStim(const char* filename, double tstop); extern int nrn_extra_thread0_vdata; extern void nrn_set_extra_thread0_vdata(void); extern Point_process* nrn_artcell_instantiate(const char* mechname); extern int nrnmpi_spike_compress(int nspike, bool gidcompress, int xchng); extern bool nrn_use_bin_queue_; extern void nrn_outputevent(unsigned char, double); extern void ncs2nrn_integrate(double tstop); extern void handle_forward_skip(double forwardskip, int prcellgid); extern int nrn_set_timeout(int); extern void nrn_fake_fire(int gid, double spiketime, int fake_out); extern void netpar_tid_gid2ps(int tid, int gid, PreSyn** ps, InputPreSyn** psi); extern double set_mindelay(double maxdelay); extern int nrn_soa_padded_size(int cnt, int layout); extern int interleave_permute_type; extern int cellorder_nwarp; // Mechanism pdata index values into _actual_v and _actual_area data need to be updated. enum Layout { SoA = 0, AoS = 1 }; } // namespace coreneuron ================================================ FILE: coreneuron/nrnoc/md1redef.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #define v _v #define area _area #define thisnode _thisnode #define GC _GC #define EC _EC #define extnode _extnode #define xain _xain #define xbout _xbout #define i _i #define sec _sec #undef Memb_list #undef nodelist #undef nodeindices #undef data #undef pdata #undef prop #undef nodecount #undef pval #undef id #undef weights #undef weight_index_ #define nodelist _nodelist #define nodeindices _nodeindices #define data _data #define pdata _pdata #define prop _prop #define nodecount _nodecount #define pval _pval #define id _id #define weights _weights #define weight_index_ _weight_index ================================================ FILE: coreneuron/nrnoc/md2redef.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #undef v #undef area #undef thisnode #undef GC #undef EC #undef extnode #undef xain #undef xbout #undef i #undef sec #undef NrnThread #undef Memb_list #undef nodelist #undef nodeindices #undef data #undef pdata #undef prop #undef nodecount #undef pval #undef weights #undef weight_index_ #undef id ================================================ FILE: coreneuron/permute/balance.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ // use LPT algorithm to balance cells so all warps have similar number // of compartments. // NB: Ideally we'd balance so that warps have similar ncycle. But we do not // know how to predict warp quality without an apriori set of cells to // fill the warp. For large numbers of cells in a warp, // it is a justifiable speculation to presume that there will be very // few holes in warp filling. I.e., ncycle = ncompart/warpsize #include #include "coreneuron/nrnconf.h" #include "coreneuron/network/tnode.hpp" #include "coreneuron/utils/lpt.hpp" namespace coreneuron { int cellorder_nwarp = 0; // 0 means do not balance // ordering by warp, then old order bool warpcmp(const TNode* a, const TNode* b) { if (a->groupindex < b->groupindex) { return true; } else if (a->groupindex == b->groupindex && a->nodevec_index < b->nodevec_index) { return true; } return false; } // order the ncell nodevec roots for balance and return a displacement // vector specifying the contiguous roots for a warp. // The return vector should be freed by the caller. // On entry, nodevec is ordered so that each cell type is together and // largest cells first. On exit, nodevec is ordered so that warp i // should contain roots nodevec[displ[i]:displ[i+1]] size_t warp_balance(size_t ncell, VecTNode& nodevec) { if (ncell == 0) { return 0; } if (cellorder_nwarp == 0) { return 0; } size_t nwarp = size_t(cellorder_nwarp); // cannot be more warps than cells nwarp = std::min(ncell, nwarp); // cellsize vector and location of types. std::vector cellsize(ncell); std::vector typedispl; size_t total_compart = 0; typedispl.push_back(0); // types are already in order for (size_t i = 0; i < ncell; ++i) { cellsize[i] = nodevec[i]->treesize; total_compart += cellsize[i]; if (i == 0 || nodevec[i]->hash != nodevec[i - 1]->hash) { typedispl.push_back(typedispl.back() + 1); } else { typedispl.back() += 1; } } size_t ideal_compart_per_warp = total_compart / nwarp; size_t min_cells_per_warp = 0; for (size_t i = 0, sz = 0; sz < ideal_compart_per_warp; ++i) { ++min_cells_per_warp; sz += cellsize[i]; } // balance when order is unrestricted (identical cells not together) // i.e. pieces are cellsize double best_balance = 0.0; auto inwarp = lpt(nwarp, cellsize, &best_balance); printf("best_balance=%g ncell=%ld ntype=%ld nwarp=%ld\n", best_balance, ncell, typedispl.size() - 1, nwarp); // order the roots for balance for (size_t i = 0; i < ncell; ++i) { TNode* nd = nodevec[i]; nd->groupindex = inwarp[i]; } std::sort(nodevec.begin(), nodevec.begin() + ncell, warpcmp); for (size_t i = 0; i < nodevec.size(); ++i) { TNode* nd = nodevec[i]; for (size_t j = 0; j < nd->children.size(); ++j) { nd->children[j]->groupindex = nd->groupindex; } nd->nodevec_index = i; } return nwarp; } } // namespace coreneuron ================================================ FILE: coreneuron/permute/cellorder.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/network/tnode.hpp" #include "coreneuron/utils/lpt.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/utils/offload.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/permute/node_permute.h" // for print_quality #ifdef _OPENACC #include #endif #include namespace coreneuron { int interleave_permute_type; InterleaveInfo* interleave_info; // nrn_nthread array void InterleaveInfo::swap(InterleaveInfo& info) { std::swap(nwarp, info.nwarp); std::swap(nstride, info.nstride); std::swap(stridedispl, info.stridedispl); std::swap(stride, info.stride); std::swap(firstnode, info.firstnode); std::swap(lastnode, info.lastnode); std::swap(cellsize, info.cellsize); std::swap(nnode, info.nnode); std::swap(ncycle, info.ncycle); std::swap(idle, info.idle); std::swap(cache_access, info.cache_access); std::swap(child_race, info.child_race); } InterleaveInfo::InterleaveInfo(const InterleaveInfo& info) { nwarp = info.nwarp; nstride = info.nstride; copy_align_array(stridedispl, info.stridedispl, nwarp + 1); copy_align_array(stride, info.stride, nstride); copy_align_array(firstnode, info.firstnode, nwarp + 1); copy_align_array(lastnode, info.lastnode, nwarp + 1); copy_align_array(cellsize, info.cellsize, nwarp); copy_array(nnode, info.nnode, nwarp); copy_array(ncycle, info.ncycle, nwarp); copy_array(idle, info.idle, nwarp); copy_array(cache_access, info.cache_access, nwarp); copy_array(child_race, info.child_race, nwarp); } InterleaveInfo& InterleaveInfo::operator=(const InterleaveInfo& info) { // self assignment if (this == &info) return *this; InterleaveInfo temp(info); this->swap(temp); return *this; } InterleaveInfo::~InterleaveInfo() { if (stride) { free_memory(stride); free_memory(firstnode); free_memory(lastnode); free_memory(cellsize); } if (stridedispl) { free_memory(stridedispl); } if (idle) { delete[] nnode; delete[] ncycle; delete[] idle; delete[] cache_access; delete[] child_race; } } void create_interleave_info() { destroy_interleave_info(); interleave_info = new InterleaveInfo[nrn_nthread]; } void destroy_interleave_info() { if (interleave_info) { delete[] interleave_info; interleave_info = nullptr; } } // more precise visualization of the warp quality // can be called after admin2 static void print_quality2(int iwarp, InterleaveInfo& ii, int* p) { int pc = (iwarp == 0); // print warp 0 pc = 0; // turn off printing int nodebegin = ii.lastnode[iwarp]; int* stride = ii.stride + ii.stridedispl[iwarp]; int ncycle = ii.cellsize[iwarp]; int inode = nodebegin; size_t nn = 0; // number of nodes in warp. '.' size_t nx = 0; // number of idle cores on all cycles. 'X' size_t ncacheline = 0; ; // number of parent memory cacheline accesses. // assmue warpsize is max number in a cachline so all o size_t ncr = 0; // number of child race. nchild-1 of same parent in same cycle for (int icycle = 0; icycle < ncycle; ++icycle) { int s = stride[icycle]; int lastp = -2; if (pc) printf(" "); std::set crace; // how many children have same parent in a cycle for (int icore = 0; icore < warpsize; ++icore) { char ch = '.'; if (icore < s) { int par = p[inode]; if (crace.find(par) != crace.end()) { ch = 'r'; ++ncr; } else { crace.insert(par); } if (par != lastp + 1) { ch = (ch == 'r') ? 'R' : 'o'; ++ncacheline; } lastp = p[inode++]; ++nn; } else { ch = 'X'; ++nx; } if (pc) printf("%c", ch); } if (pc) printf("\n"); } ii.nnode[iwarp] = nn; ii.ncycle[iwarp] = size_t(ncycle); ii.idle[iwarp] = nx; ii.cache_access[iwarp] = ncacheline; ii.child_race[iwarp] = ncr; if (pc) printf("warp %d: %ld nodes, %d cycles, %ld idle, %ld cache access, %ld child races\n", iwarp, nn, ncycle, nx, ncacheline, ncr); } static void print_quality1(int iwarp, InterleaveInfo& ii, int ncell, int* p) { int pc = ((iwarp == 0) || iwarp == (ii.nwarp - 1)); // warp not to skip printing pc = 0; // turn off printing. int* stride = ii.stride; int cellbegin = iwarp * warpsize; int cellend = cellbegin + warpsize; cellend = (cellend < stride[0]) ? cellend : stride[0]; int ncycle = 0; for (int i = cellbegin; i < cellend; ++i) { if (ncycle < ii.cellsize[i]) { ncycle = ii.cellsize[i]; } } nrn_assert(ncycle == ii.cellsize[cellend - 1]); nrn_assert(ncycle <= ii.nstride); int ncell_in_warp = cellend - cellbegin; size_t n = 0; // number of nodes in warp (not including roots) size_t nx = 0; // number of idle cores on all cycles. X size_t ncacheline = 0; ; // number of parent memory cacheline accesses. // assume warpsize is max number in a cachline so // first core has all o int inode = ii.firstnode[cellbegin]; for (int icycle = 0; icycle < ncycle; ++icycle) { int sbegin = ncell - stride[icycle] - cellbegin; int lastp = -2; if (pc) printf(" "); for (int icore = 0; icore < warpsize; ++icore) { char ch = '.'; if (icore < ncell_in_warp && icore >= sbegin) { int par = p[inode + icore]; if (par != lastp + 1) { ch = 'o'; ++ncacheline; } lastp = par; ++n; } else { ch = 'X'; ++nx; } if (pc) printf("%c", ch); } if (pc) printf("\n"); inode += ii.stride[icycle + 1]; } ii.nnode[iwarp] = n; ii.ncycle[iwarp] = (size_t) ncycle; ii.idle[iwarp] = nx; ii.cache_access[iwarp] = ncacheline; ii.child_race[iwarp] = 0; if (pc) printf("warp %d: %ld nodes, %d cycles, %ld idle, %ld cache access\n", iwarp, n, ncycle, nx, ncacheline); } static void warp_balance(int ith, InterleaveInfo& ii) { size_t nwarp = size_t(ii.nwarp); size_t smm[4][3]; // sum_min_max see cp below for (size_t j = 0; j < 4; ++j) { smm[j][0] = 0; smm[j][1] = 1000000000; smm[j][2] = 0; } double emax = 0.0, emin = 1.0; for (size_t i = 0; i < nwarp; ++i) { size_t n = ii.nnode[i]; double e = double(n) / (n + ii.idle[i]); if (emax < e) { emax = e; } if (emin > e) { emin = e; } size_t s[4] = {n, ii.idle[i], ii.cache_access[i], ii.child_race[i]}; for (size_t j = 0; j < 4; ++j) { smm[j][0] += s[j]; if (smm[j][1] > s[j]) { smm[j][1] = s[j]; } if (smm[j][2] < s[j]) { smm[j][2] = s[j]; } } } std::vector v(nwarp); for (size_t i = 0; i < nwarp; ++i) { v[i] = ii.ncycle[i]; } double bal = load_balance(v); #ifdef DEBUG printf( "thread %d nwarp=%ld balance=%g warp_efficiency %g to %g\n", ith, nwarp, bal, emin, emax); const char* cp[4] = {"nodes", "idle", "ca", "cr"}; for (size_t i = 0; i < 4; ++i) { printf(" %s=%ld (%ld:%ld)", cp[i], smm[i][0], smm[i][1], smm[i][2]); } printf("\n"); #else (void) bal; // Remove warning about unused #endif } int* interleave_order(int ith, int ncell, int nnode, int* parent) { // return if there are no nodes to permute if (nnode <= 0) return nullptr; // ensure parent of root = -1 for (int i = 0; i < ncell; ++i) { if (parent[i] == 0) { parent[i] = -1; } } int nwarp = 0, nstride = 0, *stride = nullptr, *firstnode = nullptr; int *lastnode = nullptr, *cellsize = nullptr, *stridedispl = nullptr; int* order = node_order( ncell, nnode, parent, nwarp, nstride, stride, firstnode, lastnode, cellsize, stridedispl); if (interleave_info) { InterleaveInfo& ii = interleave_info[ith]; ii.nwarp = nwarp; ii.nstride = nstride; ii.stridedispl = stridedispl; ii.stride = stride; ii.firstnode = firstnode; ii.lastnode = lastnode; ii.cellsize = cellsize; if (0 && ith == 0 && interleave_permute_type == 1) { printf("ith=%d nstride=%d ncell=%d nnode=%d\n", ith, nstride, ncell, nnode); for (int i = 0; i < ncell; ++i) { printf("icell=%d cellsize=%d first=%d last=%d\n", i, cellsize[i], firstnode[i], lastnode[i]); } for (int i = 0; i < nstride; ++i) { printf("istride=%d stride=%d\n", i, stride[i]); } } if (ith == 0) { // needed for print_quality[12] and done once here to save time int* p = new int[nnode]; for (int i = 0; i < nnode; ++i) { p[i] = parent[i]; } permute_ptr(p, nnode, order); node_permute(p, nnode, order); ii.nnode = new size_t[nwarp]; ii.ncycle = new size_t[nwarp]; ii.idle = new size_t[nwarp]; ii.cache_access = new size_t[nwarp]; ii.child_race = new size_t[nwarp]; for (int i = 0; i < nwarp; ++i) { if (interleave_permute_type == 1) { print_quality1(i, interleave_info[ith], ncell, p); } if (interleave_permute_type == 2) { print_quality2(i, interleave_info[ith], p); } } delete[] p; warp_balance(ith, interleave_info[ith]); } } return order; } #if INTERLEAVE_DEBUG // only the cell per core style static int** cell_indices_debug(NrnThread& nt, InterleaveInfo& ii) { int ncell = nt.ncell; int nnode = nt.end; int* parents = nt._v_parent_index; // we expect the nodes to be interleave ordered with smallest cell first // establish consistency with ii. // first ncell parents are -1 for (int i = 0; i < ncell; ++i) { nrn_assert(parents[i] == -1); } int* sz = new int[ncell]; int* cell = new int[nnode]; for (int i = 0; i < ncell; ++i) { sz[i] = 0; cell[i] = i; } for (int i = ncell; i < nnode; ++i) { cell[i] = cell[parents[i]]; sz[cell[i]] += 1; } // cells are in inceasing sz order; for (int i = 1; i < ncell; ++i) { nrn_assert(sz[i - 1] <= sz[i]); } // same as ii.cellsize for (int i = 0; i < ncell; ++i) { nrn_assert(sz[i] == ii.cellsize[i]); } int** cellindices = new int*[ncell]; for (int i = 0; i < ncell; ++i) { cellindices[i] = new int[sz[i]]; sz[i] = 0; // restart sz counts } for (int i = ncell; i < nnode; ++i) { cellindices[cell[i]][sz[cell[i]]] = i; sz[cell[i]] += 1; } // cellindices first and last same as ii first and last for (int i = 0; i < ncell; ++i) { nrn_assert(cellindices[i][0] == ii.firstnode[i]); nrn_assert(cellindices[i][sz[i] - 1] == ii.lastnode[i]); } delete[] sz; delete[] cell; return cellindices; } static int*** cell_indices_threads; void mk_cell_indices() { cell_indices_threads = new int**[nrn_nthread]; for (int i = 0; i < nrn_nthread; ++i) { NrnThread& nt = nrn_threads[i]; if (nt.ncell) { cell_indices_threads[i] = cell_indices_debug(nt, interleave_info[i]); } else { cell_indices_threads[i] = nullptr; } } } #endif // INTERLEAVE_DEBUG #define GPU_V(i) nt->_actual_v[i] #define GPU_A(i) nt->_actual_a[i] #define GPU_B(i) nt->_actual_b[i] #define GPU_D(i) nt->_actual_d[i] #define GPU_RHS(i) nt->_actual_rhs[i] #define GPU_PARENT(i) nt->_v_parent_index[i] // How does the interleaved permutation with stride get used in // triagularization? // each cell in parallel regardless of inhomogeneous topology static void triang_interleaved(NrnThread* nt, int icell, int icellsize, int nstride, int* stride, int* lastnode) { int i = lastnode[icell]; for (int istride = nstride - 1; istride >= 0; --istride) { if (istride < icellsize) { // only first icellsize strides matter // what is the index int ip = GPU_PARENT(i); #ifndef CORENEURON_ENABLE_GPU nrn_assert(ip >= 0); // if (ip < 0) return; #endif double p = GPU_A(i) / GPU_D(i); GPU_D(ip) -= p * GPU_B(i); GPU_RHS(ip) -= p * GPU_RHS(i); i -= stride[istride]; } } } // back substitution? static void bksub_interleaved(NrnThread* nt, int icell, int icellsize, int /* nstride */, int* stride, int* firstnode) { int i = firstnode[icell]; GPU_RHS(icell) /= GPU_D(icell); // the root for (int istride = 0; istride < icellsize; ++istride) { int ip = GPU_PARENT(i); #ifndef CORENEURON_ENABLE_GPU nrn_assert(ip >= 0); #endif GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip); GPU_RHS(i) /= GPU_D(i); i += stride[istride + 1]; } } // icore ranges [0:warpsize) ; stride[ncycle] nrn_pragma_acc(routine vector) static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* stride, int lastnode) { int icycle = ncycle - 1; int istride = stride[icycle]; int i = lastnode - istride + icore; int ii = i; // execute until all tree depths are executed bool has_subtrees_to_compute = true; // clang-format off nrn_pragma_acc(loop seq) for (; has_subtrees_to_compute; ) { // ncycle loop // serial test, gpu does this in parallel nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; if (icore < istride) { // most efficient if istride equal warpsize // what is the index int ip = GPU_PARENT(i); double p = GPU_A(i) / GPU_D(i); nrn_pragma_acc(atomic update) nrn_pragma_omp(atomic update) GPU_D(ip) -= p * GPU_B(i); nrn_pragma_acc(atomic update) nrn_pragma_omp(atomic update) GPU_RHS(ip) -= p * GPU_RHS(i); } } // if finished with all tree depths then ready to break // (note that break is not allowed in OpenACC) if (icycle == 0) { has_subtrees_to_compute = false; continue; } --icycle; istride = stride[icycle]; i -= istride; ii -= istride; } } // icore ranges [0:warpsize) ; stride[ncycle] nrn_pragma_acc(routine vector) static void bksub_interleaved2(NrnThread* nt, int root, int lastroot, int icore, int ncycle, int* stride, int firstnode) { nrn_pragma_acc(loop seq) for (int i = root; i < lastroot; i += 1) { GPU_RHS(i) /= GPU_D(i); // the root } int i = firstnode + icore; int ii = i; nrn_pragma_acc(loop seq) for (int icycle = 0; icycle < ncycle; ++icycle) { int istride = stride[icycle]; // serial test, gpu does this in parallel nrn_pragma_acc(loop vector) nrn_pragma_omp(loop bind(parallel)) for (int icore = 0; icore < warpsize; ++icore) { int i = ii + icore; if (icore < istride) { int ip = GPU_PARENT(i); GPU_RHS(i) -= GPU_B(i) * GPU_RHS(ip); GPU_RHS(i) /= GPU_D(i); } i += istride; } ii += istride; } } /** * \brief Solve Hines matrices/cells with compartment-based granularity. * * The node ordering/permuation guarantees cell interleaving (as much coalesced memory access as * possible) and balanced warps (through the use of lpt algorithm to define the groups/warps). Every * warp deals with a group of cells, therefore multiple compartments (finer level of parallelism). */ void solve_interleaved2(int ith) { NrnThread* nt = nrn_threads + ith; InterleaveInfo& ii = interleave_info[ith]; int nwarp = ii.nwarp; if (nwarp == 0) return; int ncore = nwarp * warpsize; #ifdef _OPENACC if (corenrn_param.gpu && corenrn_param.cuda_interface) { auto* d_nt = static_cast(acc_deviceptr(nt)); auto* d_info = static_cast(acc_deviceptr(interleave_info + ith)); solve_interleaved2_launcher(d_nt, d_info, ncore, acc_get_cuda_stream(nt->stream_id)); } else { #endif int* ncycles = ii.cellsize; // nwarp of these int* stridedispl = ii.stridedispl; // nwarp+1 of these int* strides = ii.stride; // sum ncycles of these (bad since ncompart/warpsize) int* rootbegin = ii.firstnode; // nwarp+1 of these int* nodebegin = ii.lastnode; // nwarp+1 of these #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) int nstride = stridedispl[nwarp]; #endif /* If we compare this loop with the one from cellorder.cu (CUDA version), we will understand * that the parallelism here is exposed in steps, while in the CUDA version all the parallelism * is exposed from the very beginning of the loop. In more details, here we initially distribute * the outermost loop, e.g. in the CUDA blocks, and for the innermost loops we explicitly use multiple * threads for the parallelization (see for example the loop directives in triang/bksub_interleaved2). * On the other hand, in the CUDA version the outermost loop is distributed to all the available threads, * and therefore there is no need to have the innermost loops. Here, the loop/icore jumps every warpsize, * while in the CUDA version the icore increases by one. Other than this, the two loop versions * are equivalent (same results). */ nrn_pragma_acc(parallel loop gang present(nt [0:1], strides [0:nstride], ncycles [0:nwarp], stridedispl [0:nwarp + 1], rootbegin [0:nwarp + 1], nodebegin [0:nwarp + 1]) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams loop if(nt->compute_gpu)) for (int icore = 0; icore < ncore; icore += warpsize) { int iwarp = icore / warpsize; // figure out the >> value int ic = icore & (warpsize - 1); // figure out the & mask int ncycle = ncycles[iwarp]; int* stride = strides + stridedispl[iwarp]; int root = rootbegin[iwarp]; // cell ID -> [0, ncell) int lastroot = rootbegin[iwarp + 1]; int firstnode = nodebegin[iwarp]; int lastnode = nodebegin[iwarp + 1]; triang_interleaved2(nt, ic, ncycle, stride, lastnode); bksub_interleaved2(nt, root + ic, lastroot, ic, ncycle, stride, firstnode); } nrn_pragma_acc(wait(nt->stream_id)) #ifdef _OPENACC } #endif } /** * \brief Solve Hines matrices/cells with cell-based granularity. * * The node ordering guarantees cell interleaving (as much coalesced memory access as possible), * but parallelism granularity is limited to a per cell basis. Therefore every execution stream * is mapped to a cell/tree. */ void solve_interleaved1(int ith) { NrnThread* nt = nrn_threads + ith; int ncell = nt->ncell; if (ncell == 0) { return; } InterleaveInfo& ii = interleave_info[ith]; int nstride = ii.nstride; int* stride = ii.stride; int* firstnode = ii.firstnode; int* lastnode = ii.lastnode; int* cellsize = ii.cellsize; // OL211123: can we preserve the error checking behaviour of OpenACC's // present clause with OpenMP? It is a bug if these data are not present, // so diagnostics are helpful... nrn_pragma_acc(parallel loop present(nt [0:1], stride [0:nstride], firstnode [0:ncell], lastnode [0:ncell], cellsize [0:ncell]) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (int icell = 0; icell < ncell; ++icell) { int icellsize = cellsize[icell]; triang_interleaved(nt, icell, icellsize, nstride, stride, lastnode); bksub_interleaved(nt, icell, icellsize, nstride, stride, firstnode); } nrn_pragma_acc(wait(nt->stream_id)) } void solve_interleaved(int ith) { if (interleave_permute_type != 1) { solve_interleaved2(ith); } else { solve_interleaved1(ith); } } } // namespace coreneuron ================================================ FILE: coreneuron/permute/cellorder.cu ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/utils/utils_cuda.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/network/tnode.hpp" #include "coreneuron/sim/multicore.hpp" namespace coreneuron { __device__ void triang_interleaved2_device(NrnThread* nt, int icore, int ncycle, int* stride, int lastnode) { int icycle = ncycle - 1; int istride = stride[icycle]; int i = lastnode - istride + icore; int ip; double p; while (icycle >= 0) { // most efficient if istride equal warpsize, else branch divergence! if (icore < istride) { ip = nt->_v_parent_index[i]; p = nt->_actual_a[i] / nt->_actual_d[i]; atomicAdd(&nt->_actual_d[ip], -p * nt->_actual_b[i]); atomicAdd(&nt->_actual_rhs[ip], -p * nt->_actual_rhs[i]); } --icycle; istride = stride[icycle]; i -= istride; } } __device__ void bksub_interleaved2_device(NrnThread* nt, int root, int lastroot, int icore, int ncycle, int* stride, int firstnode) { for (int i = root; i < lastroot; i += warpsize) { nt->_actual_rhs[i] /= nt->_actual_d[i]; // the root } int i = firstnode + icore; int ip; for (int icycle = 0; icycle < ncycle; ++icycle) { int istride = stride[icycle]; if (icore < istride) { ip = nt->_v_parent_index[i]; nt->_actual_rhs[i] -= nt->_actual_b[i] * nt->_actual_rhs[ip]; nt->_actual_rhs[i] /= nt->_actual_d[i]; } i += istride; } } __global__ void solve_interleaved2_kernel(NrnThread* nt, InterleaveInfo* ii, int ncore) { int icore = blockDim.x * blockIdx.x + threadIdx.x; int* ncycles = ii->cellsize; // nwarp of these int* stridedispl = ii->stridedispl; // nwarp+1 of these int* strides = ii->stride; // sum ncycles of these (bad since ncompart/warpsize) int* rootbegin = ii->firstnode; // nwarp+1 of these int* nodebegin = ii->lastnode; // nwarp+1 of these while (icore < ncore) { int iwarp = icore / warpsize; // figure out the >> value int ic = icore & (warpsize - 1); // figure out the & mask int ncycle = ncycles[iwarp]; int* stride = strides + stridedispl[iwarp]; int root = rootbegin[iwarp]; int lastroot = rootbegin[iwarp + 1]; int firstnode = nodebegin[iwarp]; int lastnode = nodebegin[iwarp + 1]; triang_interleaved2_device(nt, ic, ncycle, stride, lastnode); bksub_interleaved2_device(nt, root + ic, lastroot, ic, ncycle, stride, firstnode); icore += blockDim.x * gridDim.x; } } void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream) { auto cuda_stream = static_cast(stream); /// the selection of these parameters has been done after running the channel-benchmark for /// typical production runs, i.e. 1 MPI task with 1440 cells & 6 MPI tasks with 8800 cells. /// In the OpenACC/OpenMP implementations threadsPerBlock is set to 32. From profiling the /// channel-benchmark circuits mentioned above we figured out that the best performance was /// achieved with this configuration int threadsPerBlock = warpsize; /// Max number of blocksPerGrid for NVIDIA GPUs is 65535, so we need to make sure that the /// blocksPerGrid we launch the CUDA kernel with doesn't exceed this number const auto maxBlocksPerGrid = 65535; int provisionalBlocksPerGrid = (ncore + threadsPerBlock - 1) / threadsPerBlock; int blocksPerGrid = provisionalBlocksPerGrid <= maxBlocksPerGrid ? provisionalBlocksPerGrid : maxBlocksPerGrid; solve_interleaved2_kernel<<>>(nt, info, ncore); cudaStreamSynchronize(cuda_stream); CHECKLAST("solve_interleaved2_launcher"); } } // namespace coreneuron ================================================ FILE: coreneuron/permute/cellorder.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/utils/memory.h" #include namespace coreneuron { /** * \brief Function that performs the permutation of the cells such that the * execution threads access coalesced memory. * * \param ith NrnThread to access * \param ncell number of cells in NrnThread * \param nnode number of compartments in the ncells * \param parent parent indices of cells * * \return int* order, interleaved order of the cells */ int* interleave_order(int ith, int ncell, int nnode, int* parent); void create_interleave_info(); void destroy_interleave_info(); /** * * \brief Solve the Hines matrices based on the interleave_permute_type (1 or 2). * * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix * is solved by multiple execution threads (with coalesced memory access as well) */ extern void solve_interleaved(int ith); class InterleaveInfo; // forward declaration /** * * \brief CUDA branch of the solve_interleaved with interleave_permute_type == 2. * * This branch is activated in runtime with the --cuda-interface CLI flag */ void solve_interleaved2_launcher(NrnThread* nt, InterleaveInfo* info, int ncore, void* stream); class InterleaveInfo: public MemoryManaged { public: InterleaveInfo() = default; InterleaveInfo(const InterleaveInfo&); InterleaveInfo& operator=(const InterleaveInfo&); ~InterleaveInfo(); int nwarp = 0; // used only by interleave2 int nstride = 0; int* stridedispl = nullptr; // interleave2: nwarp+1 int* stride = nullptr; // interleave2: stride length is stridedispl[nwarp] int* firstnode = nullptr; // interleave2: rootbegin nwarp+1 displacements int* lastnode = nullptr; // interleave2: nodebegin nwarp+1 displacements int* cellsize = nullptr; // interleave2: ncycles nwarp // statistics (nwarp of each) size_t* nnode = nullptr; size_t* ncycle = nullptr; size_t* idle = nullptr; size_t* cache_access = nullptr; size_t* child_race = nullptr; private: void swap(InterleaveInfo& info); }; /** * \brief Function that returns a permutation of length nnode. * * There are two permutation strategies: * For interleave_permute_type == 1 : Naive interleaving -> Each execution thread deals with one * Hines matrix (cell) For interleave_permute_type == 2 : Advanced interleaving -> Each Hines matrix * is solved by multiple execution threads (with coalesced memory access as well) * * \param ncell number of cells * \param nnode number of compartments in the ncells * \param parents parent indices of the cells * \param nwarp number of warps * \param nstride nstride is the maximum cell size (not counting root) * \param stride stride[i] is the number of cells with an ith node: * using stride[i] we know how many positions to move in order to * access the next element of the same cell (given that the cells are * ordered with the treenode_order). * \param firstnode firstnode[i] is the index of the first nonroot node of the cell * \param lastnode lastnode[i] is the index of the last node of the cell * \param cellsize cellsize is the number of nodes in the cell not counting root. * \param stridedispl * \return int* : a permutation of length nnode */ int* node_order(int ncell, int nnode, int* parents, int& nwarp, int& nstride, int*& stride, int*& firstnode, int*& lastnode, int*& cellsize, int*& stridedispl); // copy src array to dest with new allocation template void copy_array(T*& dest, T* src, size_t n) { dest = new T[n]; std::copy(src, src + n, dest); } // copy src array to dest with NRN_SOA_BYTE_ALIGN ecalloc_align allocation template void copy_align_array(T*& dest, T* src, size_t n) { dest = static_cast(ecalloc_align(n, sizeof(T))); std::copy(src, src + n, dest); } #ifndef INTERLEAVE_DEBUG #define INTERLEAVE_DEBUG 0 #endif #if INTERLEAVE_DEBUG void mk_cell_indices(); #endif } // namespace coreneuron ================================================ FILE: coreneuron/permute/cellorder1.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/network/tnode.hpp" // just for interleave_permute_type #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/utils/memory.h" namespace coreneuron { static size_t groupsize = 32; /** * \brief Function to order trees by size, hash and nodeindex */ static bool tnode_earlier(TNode* a, TNode* b) { bool result = false; if (a->treesize < b->treesize) { // treesize dominates result = true; } else if (a->treesize == b->treesize) { if (a->hash < b->hash) { // if treesize same, keep identical trees together result = true; } else if (a->hash == b->hash) { result = a->nodeindex < b->nodeindex; // identical trees ordered by nodeindex } } return result; } static bool ptr_tnode_earlier(TNode* a, TNode* b) { return tnode_earlier(a, b); } TNode::TNode(int ix) { nodeindex = ix; cellindex = 0; groupindex = 0; level = 0; hash = 0; treesize = 1; nodevec_index = 0; treenode_order = 0; parent = nullptr; children.reserve(2); } TNode::~TNode() {} size_t TNode::mkhash() { // call on all nodes in leaf to root order // concept from http://stackoverflow.com/questions/20511347/a-good-hash-function-for-a-vector std::sort(children.begin(), children.end(), ptr_tnode_earlier); hash = children.size(); treesize = 1; for (size_t i = 0; i < children.size(); ++i) { // need sorted by child hash hash ^= children[i]->hash + 0x9e3779b9 + (hash << 6) + (hash >> 2); treesize += children[i]->treesize; } return hash; // hash of leaf nodes is 0 } static void tree_analysis(int* parent, int nnode, int ncell, VecTNode&); static void node_interleave_order(int ncell, VecTNode&); static void admin1(int ncell, VecTNode& nodevec, int& nwarp, int& nstride, int*& stride, int*& firstnode, int*& lastnode, int*& cellsize); static void admin2(int ncell, VecTNode& nodevec, int& nwarp, int& nstride, int*& stridedispl, int*& strides, int*& rootbegin, int*& nodebegin, int*& ncycles); static void check(VecTNode&); #if CORENRN_DEBUG static void prtree(VecTNode&); #endif using TNI = std::pair; using HashCnt = std::map>; using TNIVec = std::vector; /* assess the quality of the ordering. The measure is the size of a contiguous list of nodes whose parents have the same order. How many contiguous lists have that same size. How many nodes participate in that size list. Modify the quality measure from experience with performance. Start with list of (nnode, size_participation) */ static void quality(VecTNode& nodevec, size_t max = 32) { size_t qcnt = 0; // how many contiguous nodes have contiguous parents // first ncell nodes are by definition in contiguous order for (const auto& n: nodevec) { if (n->parent != nullptr) { break; } qcnt += 1; } size_t ncell = qcnt; // key is how many parents in contiguous order // value is number of nodes that participate in that std::map qual; size_t ip_last = 10000000000; for (size_t i = ncell; i < nodevec.size(); ++i) { size_t ip = nodevec[i]->parent->nodevec_index; // i%max == 0 means that if we start a warp with 8 and then have 32 // the 32 is broken into 24 and 8. (modify if the arrangement during // gaussian elimination becomes more sophisticated.( if (ip == ip_last + 1 && i % max != 0) { // contiguous qcnt += 1; } else { if (qcnt == 1) { // printf("unique %ld p=%ld ix=%d\n", i, ip, nodevec[i]->nodeindex); } qual[max] += (qcnt / max) * max; size_t x = qcnt % max; if (x) { qual[x] += x; } qcnt = 1; } ip_last = ip; } qual[max] += (qcnt / max) * max; size_t x = qcnt % max; if (x) { qual[x] += x; } // print result qcnt = 0; #if CORENRN_DEBUG for (const auto& q: qual) { qcnt += q.second; printf("%6ld %6ld\n", q.first, q.second); } #endif #if CORENRN_DEBUG printf("qual.size=%ld qual total nodes=%ld nodevec.size=%ld\n", qual.size(), qcnt, nodevec.size()); #endif // how many race conditions. ie refer to same parent on different core // of warp (max cores) or parent in same group of max. size_t maxip = ncell; size_t nrace1 = 0; size_t nrace2 = 0; std::set ipused; for (size_t i = ncell; i < nodevec.size(); ++i) { TNode* nd = nodevec[i]; size_t ip = nd->parent->nodevec_index; if (i % max == 0) { maxip = i; ipused.clear(); } if (ip >= maxip) { nrace1 += 1; } /*else*/ { if (ipused.find(ip) != ipused.end()) { nrace2 += 1; if (ip >= maxip) { // printf("race for parent %ld (parent in same group as multiple users))\n", // ip); } } else { ipused.insert(ip); } } } static_cast(nrace1); static_cast(nrace2); #if CORENRN_DEBUG printf("nrace = %ld (parent in same group of %ld nodes)\n", nrace1, max); printf("nrace = %ld (parent used more than once by same group of %ld nodes)\n", nrace2, max); #endif } size_t level_from_root(VecTNode& nodevec) { size_t maxlevel = 0; for (auto& nd: nodevec) { if (nd->parent) { nd->level = nd->parent->level + 1; if (maxlevel < nd->level) { maxlevel = nd->level; } } else { nd->level = 0; } } return maxlevel; } size_t level_from_leaf(VecTNode& nodevec) { size_t maxlevel = 0; for (size_t i = nodevec.size() - 1; true; --i) { TNode* nd = nodevec[i]; size_t lmax = 0; for (auto& child: nd->children) { if (lmax <= child->level) { lmax = child->level + 1; } } nd->level = lmax; if (maxlevel < lmax) { maxlevel = lmax; } if (i == 0) { break; } } return maxlevel; } /** * \brief Set the cellindex to distinguish the different cells. */ static void set_cellindex(int ncell, VecTNode& nodevec) { for (int i = 0; i < ncell; ++i) { nodevec[i]->cellindex = i; } for (size_t i = 0; i < nodevec.size(); ++i) { TNode& nd = *nodevec[i]; for (size_t j = 0; j < nd.children.size(); ++j) { TNode* cnode = nd.children[j]; cnode->cellindex = nd.cellindex; } } } /** * \brief Initialization of the groupindex (groups) * * The cells are groupped at a later stage based on a load balancing algorithm. * This is just an initialization function. */ static void set_groupindex(VecTNode& nodevec) { for (size_t i = 0; i < nodevec.size(); ++i) { TNode* nd = nodevec[i]; if (nd->parent) { nd->groupindex = nd->parent->groupindex; } else { nd->groupindex = i / groupsize; } } } // how many identical trees and their levels // print when more than one instance of a type // reverse the sense of levels (all leaves are level 0) to get a good // idea of the depth of identical subtrees. static void ident_statistic(VecTNode& nodevec, size_t ncell) { // reverse sense of levels // size_t maxlevel = level_from_leaf(nodevec); size_t maxlevel = level_from_root(nodevec); // # in each level std::vector> n_in_level(maxlevel + 1); for (auto& n: n_in_level) { n.resize(ncell / groupsize); } for (const auto& n: nodevec) { n_in_level[n->level][n->groupindex]++; } printf("n_in_level.size = %ld\n", n_in_level.size()); for (size_t i = 0; i < n_in_level.size(); ++i) { printf("%5ld\n", i); for (const auto& n: n_in_level[i]) { printf(" %5ld", n); } printf("\n"); } } #undef MSS int* node_order(int ncell, int nnode, int* parent, int& nwarp, int& nstride, int*& stride, int*& firstnode, int*& lastnode, int*& cellsize, int*& stridedispl) { VecTNode nodevec; // nodevec[0:ncell] in increasing size, with identical trees together, // and otherwise nodeindex order // nodevec.size = nnode tree_analysis(parent, nnode, ncell, nodevec); check(nodevec); set_cellindex(ncell, nodevec); set_groupindex(nodevec); level_from_root(nodevec); // nodevec[ncell:nnode] cells are interleaved in nodevec[0:ncell] cell order if (interleave_permute_type == 1) { node_interleave_order(ncell, nodevec); } else { group_order2(nodevec, groupsize, ncell); } check(nodevec); #if CORENRN_DEBUG for (int i = 0; i < ncell; ++i) { TNode& nd = *nodevec[i]; printf("%d size=%ld hash=%ld ix=%d\n", i, nd.treesize, nd.hash, nd.nodeindex); } #endif if (0) ident_statistic(nodevec, ncell); quality(nodevec); // the permutation int* nodeorder = new int[nnode]; for (int i = 0; i < nnode; ++i) { TNode& nd = *nodevec[i]; nodeorder[nd.nodeindex] = i; } // administrative statistics for gauss elimination if (interleave_permute_type == 1) { admin1(ncell, nodevec, nwarp, nstride, stride, firstnode, lastnode, cellsize); } else { // admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, rootbegin, nodebegin, // ncycles); admin2(ncell, nodevec, nwarp, nstride, stridedispl, stride, firstnode, lastnode, cellsize); } int ntopol = 1; for (int i = 1; i < ncell; ++i) { if (nodevec[i - 1]->hash != nodevec[i]->hash) { ntopol += 1; } } static_cast(ntopol); #ifdef DEBUG printf("%d distinct tree topologies\n", ntopol); #endif for (size_t i = 0; i < nodevec.size(); ++i) { delete nodevec[i]; } return nodeorder; } void check(VecTNode& nodevec) { // printf("check\n"); size_t nnode = nodevec.size(); size_t ncell = 0; for (size_t i = 0; i < nnode; ++i) { nodevec[i]->nodevec_index = i; if (nodevec[i]->parent == nullptr) { ncell++; } } /// Check that the first compartments of nodevec are the root nodes (cells) for (size_t i = 0; i < ncell; ++i) { nrn_assert(nodevec[i]->parent == nullptr); } for (size_t i = ncell; i < nnode; ++i) { TNode& nd = *nodevec[i]; if (nd.parent->nodevec_index >= nd.nodevec_index) { printf("error i=%ld nodevec_index=%ld parent=%ld\n", i, nd.nodevec_index, nd.parent->nodevec_index); } nrn_assert(nd.nodevec_index > nd.parent->nodevec_index); } } #if CORENRN_DEBUG void prtree(VecTNode& nodevec) { size_t nnode = nodevec.size(); for (size_t i = 0; i < nnode; ++i) { nodevec[i]->nodevec_index = i; } for (size_t i = 0; i < nnode; ++i) { TNode& nd = *nodevec[i]; printf("%ld p=%d c=%ld l=%ld o=%ld ix=%d pix=%d\n", i, nd.parent ? int(nd.parent->nodevec_index) : -1, nd.cellindex, nd.level, nd.treenode_order, nd.nodeindex, nd.parent ? int(nd.parent->nodeindex) : -1); } } #endif /** * \brief Perform tree preparation for interleaving strategies * * \param parent vector of parent indices * \param nnode number of compartments in the cells * \param ncell number of cells */ void tree_analysis(int* parent, int nnode, int ncell, VecTNode& nodevec) { // create empty TNodes (knowing only their index) nodevec.reserve(nnode); for (int i = 0; i < nnode; ++i) { nodevec.push_back(new TNode(i)); } // determine the (sorted by hash) children of each node for (int i = nnode - 1; i >= ncell; --i) { nodevec[i]->parent = nodevec[parent[i]]; nodevec[i]->mkhash(); nodevec[parent[i]]->children.push_back(nodevec[i]); } // determine hash of the cells for (int i = 0; i < ncell; ++i) { nodevec[i]->mkhash(); } // sort it by tree size (from smaller to larger) std::sort(nodevec.begin(), nodevec.begin() + ncell, tnode_earlier); } static bool interleave_comp(TNode* a, TNode* b) { bool result = false; if (a->treenode_order < b->treenode_order) { result = true; } else if (a->treenode_order == b->treenode_order) { if (a->cellindex < b->cellindex) { result = true; } } return result; } /** * \brief Naive interleaving strategy (interleave_permute_type == 1) * * Sort so nodevec[ncell:nnode] cell instances are interleaved. Keep the * secondary ordering with respect to treenode_order so each cell is still a tree. * * \param ncell number of cells (trees) * \param nodevec vector that contains compartments (nodes of the trees) */ void node_interleave_order(int ncell, VecTNode& nodevec) { int* order = new int[ncell]; for (int i = 0; i < ncell; ++i) { order[i] = 0; nodevec[i]->treenode_order = order[i]++; } for (size_t i = 0; i < nodevec.size(); ++i) { TNode& nd = *nodevec[i]; for (size_t j = 0; j < nd.children.size(); ++j) { TNode* cnode = nd.children[j]; cnode->treenode_order = order[nd.cellindex]++; } } delete[] order; // std::sort(nodevec.begin() + ncell, nodevec.end(), contig_comp); // Traversal of nodevec: From root to leaves (this is why we compute the tree node order) std::sort(nodevec.begin() + ncell, nodevec.end(), interleave_comp); #if CORENRN_DEBUG for (size_t i = 0; i < nodevec.size(); ++i) { TNode& nd = *nodevec[i]; printf("%ld cell=%ld ix=%d\n", i, nd.cellindex, nd.nodeindex); } #endif } static void admin1(int ncell, VecTNode& nodevec, int& nwarp, int& nstride, int*& stride, int*& firstnode, int*& lastnode, int*& cellsize) { firstnode = (int*) ecalloc_align(ncell, sizeof(int)); lastnode = (int*) ecalloc_align(ncell, sizeof(int)); cellsize = (int*) ecalloc_align(ncell, sizeof(int)); nwarp = (ncell % warpsize == 0) ? (ncell / warpsize) : (ncell / warpsize + 1); for (int i = 0; i < ncell; ++i) { firstnode[i] = -1; lastnode[i] = -1; cellsize[i] = 0; } nstride = 0; for (size_t i = ncell; i < nodevec.size(); ++i) { TNode& nd = *nodevec[i]; size_t ci = nd.cellindex; if (firstnode[ci] == -1) { firstnode[ci] = i; } lastnode[ci] = i; cellsize[ci] += 1; if (nstride < cellsize[ci]) { nstride = cellsize[ci]; } } // this vector is used to move from one compartment to the other (per cell) // its length is equal to the cell with the highest number of compartments stride = static_cast(ecalloc_align(nstride + 1, sizeof(int))); for (size_t i = ncell; i < nodevec.size(); ++i) { TNode& nd = *nodevec[i]; // compute how many compartments with the same order // treenode_order : defined in breadth first fashion (for each cell separately) stride[nd.treenode_order - 1] += 1; // -1 because treenode order includes root } } // for admin2 we allow the node organisation in warps of (say 4 cores per warp) // ............... ideal warp but unbalanced relative to warp with max cycles // ............... ncycle = 15, icore [0:4), all strides are 4. // ............... // ............... // // .......... unbalanced relative to warp with max cycles // .......... ncycle = 10, not all strides the same because // .......... of need to avoid occasional race conditions. // . . .. icore [4:8) only 4 strides of 4 // // .................... ncycle = 20, uses only one core in the warp (cable) // icore 8, all ncycle strides are 1 // One thing to be unhappy about is the large stride vector of size about // number of compartments/warpsize. There are a lot of models where the // stride for a warp is constant except for one cycle in the warp and that // is easy to obtain when there are more than warpsize cells per warp. static size_t stride_length(size_t begin, size_t end, VecTNode& nodevec) { // return stride length starting at i. Do not go past j. // max stride is warpsize. // At this time, only assume vicious parent race conditions matter. if (end - begin > warpsize) { end = begin + warpsize; } for (size_t i = begin; i < end; ++i) { TNode* nd = nodevec[i]; nrn_assert(nd->nodevec_index == i); size_t diff = dist2child(nd); if (i + diff < end) { end = i + diff; } } return end - begin; } /** * \brief Prepare for solve_interleaved2 * * One group of cells per warp. * * warp[i] has a number of compute cycles (ncycle[i]) * the index of its first root (rootbegin[i], last rootbegin[nwarp] = ncell) * the index of its first node (nodebegin[i], last nodebegin[nwarp] = nnode) * * Each compute cycle has a stride * A stride is how many nodes are processed by a warp in one compute cycle * There are nstride strides. nstride is the sum of ncycles of all warps. * warp[i] has ncycle[i] strides * same as sum of ncycle * warp[i] has a stridedispl[i] which is stridedispl[i-1] + ncycle[i]. * ie. The zeroth cycle of warp[j] works on stride[stridedispl[j]] * The value of a stride beginning at node i (node i is computed by core 0 of * some warp for some cycle) is determined by stride_length(i, j, nodevec) * */ static void admin2(int ncell, VecTNode& nodevec, int& nwarp, int& nstride, int*& stridedispl, int*& strides, int*& rootbegin, int*& nodebegin, int*& ncycles) { // the number of groups is the number of warps needed // ncore is the number of warps * warpsize nwarp = nodevec[ncell - 1]->groupindex + 1; ncycles = (int*) ecalloc_align(nwarp, sizeof(int)); stridedispl = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // running sum of ncycles (start at 0) rootbegin = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // index (+1) of first root in warp. nodebegin = (int*) ecalloc_align(nwarp + 1, sizeof(int)); // index (+1) of first node in warp. // rootbegin and nodebegin are the root index values + 1 of the last of // the sequence of constant groupindex rootbegin[0] = 0; for (size_t i = 0; i < size_t(ncell); ++i) { rootbegin[nodevec[i]->groupindex + 1] = i + 1; } nodebegin[0] = ncell; // We start from the leaves and go backwards towards the root for (size_t i = size_t(ncell); i < nodevec.size(); ++i) { nodebegin[nodevec[i]->groupindex + 1] = i + 1; } // ncycles, stridedispl, and nstride nstride = 0; stridedispl[0] = 0; for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) { size_t j = size_t(nodebegin[iwarp + 1]); int nc = 0; size_t i = nodebegin[iwarp]; // in this loop we traverse all the children of all the cells in the current warp (iwarp) while (i < j) { i += stride_length(i, j, nodevec); ++nc; // how many times the warp should loop in order to finish with all the tree // depths (for all the trees of the warp/group) } ncycles[iwarp] = nc; stridedispl[iwarp + 1] = stridedispl[iwarp] + nc; nstride += nc; } // strides strides = (int*) ecalloc_align(nstride, sizeof(int)); nstride = 0; for (size_t iwarp = 0; iwarp < (size_t) nwarp; ++iwarp) { size_t j = size_t(nodebegin[iwarp + 1]); size_t i = nodebegin[iwarp]; while (i < j) { int k = stride_length(i, j, nodevec); i += k; strides[nstride++] = k; } } #if CORENRN_DEBUG printf("warp rootbegin nodebegin stridedispl\n"); for (int i = 0; i <= nwarp; ++i) { printf("%4d %4d %4d %4d\n", i, rootbegin[i], nodebegin[i], stridedispl[i]); } #endif } } // namespace coreneuron ================================================ FILE: coreneuron/permute/cellorder2.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include #include #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/network/tnode.hpp" #include "coreneuron/nrniv/nrniv_decl.h" // experiment starting with identical cell ordering // groupindex aleady defined that keeps identical cells together // begin with leaf to root ordering namespace coreneuron { using VTN = VecTNode; // level of nodes using VVTN = std::vector; // group of levels using VVVTN = std::vector; // groups // verify level in groups of nident identical nodes void chklevel(VTN& level, size_t nident = 8) {} // first child before second child, etc // if same parent level, then parent order // if not same parent, then earlier parent (no parent earlier than parent) // if same parents, then children order // if no parents then nodevec_index order. static bool sortlevel_cmp(TNode* a, TNode* b) { // when starting with leaf to root order // note that leaves are at max level and all roots at level 0 bool result = false; // since cannot have an index < 0, just add 1 to level size_t palevel = a->parent ? 1 + a->parent->level : 0; size_t pblevel = b->parent ? 1 + b->parent->level : 0; if (palevel < pblevel) { // only used when starting leaf to root order result = true; // earlier level first } else if (palevel == pblevel) { // always true when starting root to leaf if (palevel == 0) { // a and b are roots if (a->nodevec_index < b->nodevec_index) { result = true; } } else { // parent order (already sorted with proper treenode_order) if (a->treenode_order < b->treenode_order) { // children order result = true; } else if (a->treenode_order == b->treenode_order) { if (a->parent->treenode_order < b->parent->treenode_order) { result = true; } } } } return result; } static void sortlevel(VTN& level) { std::sort(level.begin(), level.end(), sortlevel_cmp); for (size_t i = 0; i < level.size(); ++i) { level[i]->treenode_order = i; } } // TODO: refactor since sortlevel() is traversing the nodes in same order static void set_treenode_order(VVTN& levels) { size_t order = 0; for (auto& level: levels) { for (auto* nd: level) { nd->treenode_order = order++; } } } #if CORENRN_DEBUG // every level starts out with no race conditions involving both // parent and child in the same level. Can we arrange things so that // every level has at least 32 nodes? static size_t g32(TNode* nd) { return nd->nodevec_index / warpsize; } static bool is_parent_race(TNode* nd) { // vitiating size_t pg = g32(nd); for (const auto& child: nd->children) { if (pg == g32(child)) { return true; } } return false; } #endif // less than 32 apart static bool is_parent_race2(TNode* nd) { // vitiating size_t pi = nd->nodevec_index; for (const auto& child: nd->children) { if (child->nodevec_index - pi < warpsize) { return true; } } return false; } #if CORENRN_DEBUG static bool is_child_race(TNode* nd) { // potentially handleable by atomic if (nd->children.size() < 2) { return false; } if (nd->children.size() == 2) { return g32(nd->children[0]) == g32(nd->children[1]); } std::set s; for (const auto& child: nd->children) { std::size_t gc = g32(child); if (s.find(gc) != s.end()) { return true; } s.insert(gc); } return false; } #endif static bool is_child_race2(TNode* nd) { // potentially handleable by atomic if (nd->children.size() < 2) { return false; } if (nd->children.size() == 2) { size_t c0 = nd->children[0]->nodevec_index; size_t c1 = nd->children[1]->nodevec_index; c0 = (c0 < c1) ? (c1 - c0) : (c0 - c1); return c0 < warpsize; } size_t ic0 = nd->children[0]->nodevec_index; for (size_t i = 1; i < nd->children.size(); ++i) { size_t ic = nd->children[i]->nodevec_index; if (ic - ic0 < warpsize) { return true; } ic0 = ic; } return false; } size_t dist2child(TNode* nd) { size_t d = 1000; size_t pi = nd->nodevec_index; for (const auto& child: nd->children) { std::size_t d1 = child->nodevec_index - pi; if (d1 < d) { d = d1; } } return d; } // from stackoverflow.com template static void move_range(size_t start, size_t length, size_t dst, std::vector& v) { typename std::vector::iterator first, middle, last; if (start < dst) { first = v.begin() + start; middle = first + length; last = v.begin() + dst; } else { first = v.begin() + dst; middle = v.begin() + start; last = middle + length; } std::rotate(first, middle, last); } static void move_nodes(size_t start, size_t length, size_t dst, VTN& nodes) { nrn_assert(dst <= nodes.size()); nrn_assert(start + length <= dst); move_range(start, length, dst, nodes); // check correctness of move for (size_t i = start; i < dst - length; ++i) { nrn_assert(nodes[i]->nodevec_index == i + length); } for (size_t i = dst - length; i < dst; ++i) { nrn_assert(nodes[i]->nodevec_index == start + (i - (dst - length))); } // update nodevec_index for (size_t i = start; i < dst; ++i) { nodes[i]->nodevec_index = i; } } #if CORENRN_DEBUG // least number of nodes to move after nd to eliminate prace static size_t need2move(TNode* nd) { size_t d = dist2child(nd); return warpsize - ((nd->nodevec_index % warpsize) + d); } static void how_many_warpsize_groups_have_only_leaves(VTN& nodes) { size_t n = 0; for (size_t i = 0; i < nodes.size(); i += warpsize) { bool r = true; for (size_t j = 0; j < warpsize; ++j) { if (!nodes[i + j]->children.empty()) { r = false; break; } } if (r) { printf("warpsize group %ld starting at level %ld\n", i / warpsize, nodes[i]->level); ++n; } } printf("number of warpsize groups with only leaves = %ld\n", n); } static void pr_race_situation(VTN& nodes) { size_t prace2 = 0; size_t prace = 0; size_t crace = 0; for (size_t i = nodes.size() - 1; nodes[i]->level != 0; --i) { TNode* nd = nodes[i]; if (is_parent_race2(nd)) { ++prace2; } if (is_parent_race(nd)) { printf("level=%ld i=%ld d=%ld n=%ld", nd->level, nd->nodevec_index, dist2child(nd), need2move(nd)); for (const auto& cnd: nd->children) { printf(" %ld %ld", cnd->level, cnd->nodevec_index); } printf("\n"); ++prace; } if (is_child_race(nd)) { ++crace; } } printf("prace=%ld crace=%ld prace2=%ld\n", prace, crace, prace2); } #endif static size_t next_leaf(TNode* nd, VTN& nodes) { size_t i = 0; for (i = nd->nodevec_index - 1; i > 0; --i) { if (nodes[i]->children.empty()) { return i; } } // nrn_assert(i > 0); return 0; } static void checkrace(TNode* nd, VTN& nodes) { for (size_t i = nd->nodevec_index; i < nodes.size(); ++i) { if (is_parent_race2(nodes[i])) { // printf("checkrace %ld\n", i); } } } static bool eliminate_race(TNode* nd, size_t d, VTN& nodes, TNode* look) { // printf("eliminate_race %ld %ld\n", nd->nodevec_index, d); // opportunistically move that number of leaves // error if no leaves left to move. size_t i = look->nodevec_index; while (d > 0) { i = next_leaf(nodes[i], nodes); if (i == 0) { return false; } size_t n = 1; while (nodes[i - 1]->children.empty() && n < d) { --i; ++n; } // printf(" move_nodes src=%ld len=%ld dest=%ld\n", i, n, nd->nodevec_index); move_nodes(i, n, nd->nodevec_index + 1, nodes); d -= n; } checkrace(nd, nodes); return true; } static void eliminate_prace(TNode* nd, VTN& nodes) { size_t d = warpsize - dist2child(nd); bool b = eliminate_race(nd, d, nodes, nd); if (0 && !b) { printf("could not eliminate prace for g=%ld c=%ld l=%ld o=%ld %ld\n", nd->groupindex, nd->cellindex, nd->level, nd->treenode_order, nd->hash); } } static void eliminate_crace(TNode* nd, VTN& nodes) { size_t c0 = nd->children[0]->nodevec_index; size_t c1 = nd->children[1]->nodevec_index; size_t d = warpsize - ((c0 > c1) ? (c0 - c1) : (c1 - c0)); TNode* cnd = nd->children[0]; bool b = eliminate_race(cnd, d, nodes, nd); if (0 && !b) { printf("could not eliminate crace for g=%ld c=%ld l=%ld o=%ld %ld\n", nd->groupindex, nd->cellindex, nd->level, nd->treenode_order, nd->hash); } } static void question2(VVTN& levels) { // number of compartments in the group std::size_t nnode = std::accumulate(levels.begin(), levels.end(), 0, [](std::size_t s, const VTN& l) { return s + l.size(); }); VTN nodes(nnode); // store the sorted nodes from analyze function nnode = 0; for (const auto& level: levels) { for (const auto& l: level) { nodes[nnode++] = l; } } for (size_t i = 0; i < nodes.size(); ++i) { nodes[i]->nodevec_index = i; } // how_many_warpsize_groups_have_only_leaves(nodes); // Here we need to make sure that the dependent nodes // belong to separate warps // work backward and check the distance from parent to children. // if parent in different group (warp?) then there is no vitiating race. // if children in different group (warp?) then ther is no race (satisfied by // atomic). // If there is a vitiating race, then figure out how many nodes // need to be inserted just before the parent to avoid the race. // It is not clear if we should prioritize safe nodes (when moved they // do not introduce a race) and/or contiguous nodes (probably, to keep // the low hanging fruit together). // At least, moved nodes should have proper tree order and not themselves // introduce a race at their new location. Leaves are nice in that there // are no restrictions in movement toward higher indices. // Note that unless groups of 32 are inserted, it may be the case that // races are generated at greater indices since otherwise a portion of // each group is placed into the next group. This would not be an issue // if, in fact, the stronger requirement of every parent having // pi (parent index) + 32 <= ci (child index) is demanded instead of merely being in different // warpsize. One nice thing about adding warpsize nodes is that it does not disturb any // existing contiguous groups except the moved group which gets divided between parent // warpsize and child, where the nodes past the parent get same relative indices in the next // warpsize // let's see how well we can do by opportunistically moving leaves to // separate parents from children by warpsize (ie is_parent_prace2 is false) // Hopefully, we won't run out of leaves before eliminating all // is_parent_prace2 if (0 && nodes.size() % warpsize != 0) { size_t nnode = nodes.size() - levels[0].size(); printf("warp of %ld cells has %ld nodes in last cycle %ld\n", levels[0].size(), nnode % warpsize, nnode / warpsize + 1); } // pr_race_situation(nodes); // eliminate parent and children races using leaves // traverse all the children (no roots) for (size_t i = nodes.size() - 1; i >= levels[0].size(); --i) { TNode* nd = nodes[i]; if (is_child_race2(nd)) { eliminate_crace(nd, nodes); i = nd->nodevec_index; } if (is_parent_race2(nd)) { eliminate_prace(nd, nodes); i = nd->nodevec_index; } } // copy nodes indices to treenode_order for (size_t i = 0; i < nodes.size(); ++i) { nodes[i]->treenode_order = i; } } // analyze each group of cells // the cells are grouped based on warp balance (lpt) algorithm static void analyze(VVTN& levels) { // sort each level with respect to parent level order // earliest parent level first. // treenode order can be anything as long as first children < second // children etc.. After sorting a level, the order will be correct for // that level, ranging from [0:level.size] for (auto& level: levels) { chklevel(level); // does nothing for (const auto& nd: level) { for (size_t k = 0; k < nd->children.size(); ++k) { nd->children[k]->treenode_order = k; } } } for (auto& level: levels) { sortlevel(level); chklevel(level); // does nothing } set_treenode_order(levels); } void prgroupsize(VVVTN& groups) { #if CORENRN_DEBUG for (size_t i = 0; i < groups[0].size(); ++i) { printf("%5ld\n", i); for (const auto& group: groups) { printf(" %5ld", group[i].size()); } printf("\n"); } #endif } // group index primary, treenode_order secondary static bool final_nodevec_cmp(TNode* a, TNode* b) { bool result = false; if (a->groupindex < b->groupindex) { result = true; } else if (a->groupindex == b->groupindex) { if (a->treenode_order < b->treenode_order) { result = true; } } return result; } static void set_nodeindex(VecTNode& nodevec) { for (size_t i = 0; i < nodevec.size(); ++i) { nodevec[i]->nodevec_index = i; } } void group_order2(VecTNode& nodevec, size_t groupsize, size_t ncell) { size_t maxlevel = level_from_root(nodevec); // reset TNode.groupindex size_t nwarp = warp_balance(ncell, nodevec); // work on a cellgroup as a vector of levels. ie only possible race is // two children in same warpsize // every warp deals with a group of cells // the cell dispatching to the available groups is done through the warp_balance function (lpt // algo) VVVTN groups(nwarp ? nwarp : (ncell / groupsize + ((ncell % groupsize) ? 1 : 0))); for (auto& group: groups) { group.resize(maxlevel + 1); } // group the cells according to their groupindex and according to their level (see // level_from_root) for (const auto& nd: nodevec) { groups[nd->groupindex][nd->level].push_back(nd); } prgroupsize(groups); // debugging // deal with each group for (auto& group: groups) { analyze(group); question2(group); } // final nodevec order according to group_index and treenode_order std::sort(nodevec.begin() + ncell, nodevec.end(), final_nodevec_cmp); set_nodeindex(nodevec); } } // namespace coreneuron ================================================ FILE: coreneuron/permute/data_layout.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/coreneuron.hpp" #include "coreneuron/permute/data_layout.hpp" #include "coreneuron/mechanism/mechanism.hpp" #include "coreneuron/permute/node_permute.h" #include "coreneuron/mechanism/membfunc.hpp" namespace coreneuron { /* * Return the index to mechanism variable based Original input files are organized in AoS */ int get_data_index(int node_index, int variable_index, int mtype, Memb_list* ml) { int layout = corenrn.get_mech_data_layout()[mtype]; nrn_assert(layout == SOA_LAYOUT); return variable_index * ml->_nodecount_padded + node_index; } } // namespace coreneuron ================================================ FILE: coreneuron/permute/data_layout.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #define SOA_LAYOUT 0 #define AOS_LAYOUT 1 namespace coreneuron { struct Memb_list; int get_data_index(int node_index, int variable_index, int mtype, Memb_list* ml); } // namespace coreneuron ================================================ FILE: coreneuron/permute/node_permute.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ /* Below, the sense of permutation, is reversed. Though consistent, forward permutation should be defined as (and the code should eventually transformed) so that v: original vector p: forward permutation pv: permuted vector pv[i] = v[p[i]] and pinv: inverse permutation pv[pinv[i]] = v[i] Note: pinv[p[i]] = i = p[pinv[i]] */ /* Permute nodes. To make gaussian elimination on gpu more efficient. Permutation vector p[i] applied to a data vector, moves the data_original[i] to data[p[i]]. That suffices for node properties such as area[i], a[i], b[i]. e.g. area[p[i]] <- area_original[i] Notice that p on the left side is a forward permutation. On the right side it serves as the inverse permutation. area_original[i] <- area_permuted[p[i]] but things get a bit more complicated when the data is an integer index into the original data. For example: parent[i] needs to be transformed so that parent[p[i]] <- p[parent_original[i]] except that if parent_original[j] = -1 then parent[p[j]] = -1 membrane mechanism nodelist ( a subset of nodes) needs to be at least minimally transformed so that nodelist_new[k] <- p[nodelist_original[k]] This does not affect the order of the membrane mechanism property data. However, computation is more efficient to permute (sort) nodelist_new so that it follows as much as possible the permuted node ordering, ie in increasing node order. Consider this further mechanism specific nodelist permutation, which is to be applied to the above nodelist_new, to be p_m, which has the same size as nodelist. ie. nodelist[p_m[k]] <- nodelist_new[k]. Notice the similarity to the parent case... nodelist[p_m[k]] = p[nodelist_original[k]] and now the membrane mechanism node data, does need to be permuted to have an order consistent with the new nodelist. Since there are nm instances of the mechanism each with sz data values (consider AoS layout). The data permutation is for k=[0:nm] for isz=[0:sz] data_m[p_m[k]*sz + isz] = data_m_original[k*sz + isz] For an SoA layout the indexing is k + isz*nm (where nm may include padding). A more complicated case is a mechanisms dparam array (nm instances each with dsz values) Some of those values are indices into another mechanism (eg pointers to ion properties) or voltage or area depending on the semantics of the value. We can use the above data_m permutation but then need to update the values according to the permutation of the object the value indexes into. Consider the permutation of the target object to be p_t . Then a value iold = pdata_m(k, isz) - data_t in AoS format refers to k_t = iold % sz_t and isz_t = iold - k_t*sz_t and for a target in SoA format isz_t = iold % nm_t and k_t = iold - isz_t*nm_t ie k_t_new = p_m_t[k_t] so, for AoS, inew = k_t_new*sz_t + isz_t or , for SoA, inew = k_t_new + isz_t*nm_t so pdata_m(k, isz) = inew + data_t */ #include #include #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/io/nrn_setup.hpp" #include "coreneuron/nrniv/nrniv_decl.h" #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/coreneuron.hpp" namespace coreneuron { template void permute(T* data, int cnt, int sz, int layout, int* p) { // data(p[icnt], isz) <- data(icnt, isz) // this does not change data, merely permutes it. // assert len(p) == cnt if (!p) { return; } int n = cnt * sz; if (n < 1) { return; } if (layout == Layout::SoA) { // for SoA, n might be larger due to cnt padding n = nrn_soa_padded_size(cnt, layout) * sz; } T* data_orig = new T[n]; for (int i = 0; i < n; ++i) { data_orig[i] = data[i]; } for (int icnt = 0; icnt < cnt; ++icnt) { for (int isz = 0; isz < sz; ++isz) { // note that when layout==0, nrn_i_layout takes into account SoA padding. int i = nrn_i_layout(icnt, cnt, isz, sz, layout); int ip = nrn_i_layout(p[icnt], cnt, isz, sz, layout); data[ip] = data_orig[i]; } } delete[] data_orig; } int* inverse_permute(int* p, int n) { int* pinv = new int[n]; for (int i = 0; i < n; ++i) { pinv[p[i]] = i; } return pinv; } static void invert_permute(int* p, int n) { int* pinv = inverse_permute(p, n); for (int i = 0; i < n; ++i) { p[i] = pinv[i]; } delete[] pinv; } // type_of_ntdata: Return the mechanism type (or voltage) for nt._data[i]. // Used for updating POINTER. Analogous to nrn_dblpntr2nrncore in NEURON. // To reduce search time, consider voltage first, then a few of the previous // search results. // type_hint first and store a few // of the previous search result types to try next. // Most usage is for voltage. Most of the rest is likely for a specific type. // Occasionally, eg. axial current, there are two types oscillationg between // a SUFFIX (for non-zero area node) and POINT_PROCESS (for zero area nodes) // version // full_search: helper for type_of_ntdata. Return mech type for nt._data[i]. // Update type_hints. static std::vector type_hints; static int full_search(NrnThread& nt, double* pd) { int type = -1; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { Memb_list* ml = tml->ml; int n = corenrn.get_prop_param_size()[tml->index] * ml->_nodecount_padded; if (pd >= ml->data && pd < ml->data + n) { type = tml->index; // insert into type_hints int i = 0; for (int type_hint: type_hints) { if (type < type_hint) { break; } i++; } type_hints.insert(type_hints.begin() + i, type); break; } } assert(type > 0); return type; } // no longer static because also used by POINTER in nrn_checkpoint.cpp int type_of_ntdata(NrnThread& nt, int i, bool reset) { double* pd = nt._data + i; assert(pd >= nt._actual_v); if (pd < nt._actual_area) { // voltage first (area just after voltage) return voltage; } assert(size_t(i) < nt._ndata); // then check the type hints. When inserting a hint, keep in type order if (reset) { type_hints.clear(); } for (int type: type_hints) { Memb_list* ml = nt._ml_list[type]; if (pd >= ml->data) { // this or later int n = corenrn.get_prop_param_size()[type] * ml->_nodecount_padded; if (pd < ml->data + n) { // this is the one return type; } } else { // earlier return full_search(nt, pd); } } // after the last type_hints return full_search(nt, pd); } static void update_pdata_values(Memb_list* ml, int type, NrnThread& nt) { // assumes AoS to SoA transformation already made since we are using // nrn_i_layout to determine indices into both ml->pdata and into target data int psz = corenrn.get_prop_dparam_size()[type]; if (psz == 0) { return; } if (corenrn.get_is_artificial()[type]) { return; } int* semantics = corenrn.get_memb_func(type).dparam_semantics; if (!semantics) { return; } int* pdata = ml->pdata; int layout = corenrn.get_mech_data_layout()[type]; int cnt = ml->nodecount; // ml padding does not matter (but target padding does matter) // interesting semantics are -1 (area), -5 (pointer), -9 (diam), or 0-999 (ion variables) for (int i = 0; i < psz; ++i) { int s = semantics[i]; if (s == -1) { // area int area0 = nt._actual_area - nt._data; // includes padding if relevant int* p_target = nt._permute; for (int iml = 0; iml < cnt; ++iml) { int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout); // *pd is the original integer into nt._data . Needs to be replaced // by the permuted value // This is ok whether or not area changed by padding? // since old *pd updated appropriately by earlier AoS to SoA // transformation int ix = *pd - area0; // original integer into area array. nrn_assert((ix >= 0) && (ix < nt.end)); int ixnew = p_target[ix]; *pd = ixnew + area0; } } else if (s == -9) { // diam int diam0 = nt._actual_diam - nt._data; // includes padding if relevant int* p_target = nt._permute; for (int iml = 0; iml < cnt; ++iml) { int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout); // *pd is the original integer into nt._data . Needs to be replaced // by the permuted value // This is ok whether or not diam changed by padding? // since old *pd updated appropriately by earlier AoS to SoA // transformation int ix = *pd - diam0; // original integer into actual_diam array. nrn_assert((ix >= 0) && (ix < nt.end)); int ixnew = p_target[ix]; *pd = ixnew + diam0; } } else if (s == -5) { // POINTER // assume pointer into nt._data. Most likely voltage. // If not voltage, most likely same mechanism for all indices. for (int iml = 0; iml < cnt; ++iml) { int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout); int etype = type_of_ntdata(nt, *pd, iml == 0); if (etype == voltage) { int v0 = nt._actual_v - nt._data; int* e_target = nt._permute; int ix = *pd - v0; // original integer into area array. nrn_assert((ix >= 0) && (ix < nt.end)); int ixnew = e_target[ix]; *pd = ixnew + v0; } else if (etype > 0) { // about same as for ion below but check each instance Memb_list* eml = nt._ml_list[etype]; int edata0 = eml->data - nt._data; int ecnt = eml->nodecount; int esz = corenrn.get_prop_param_size()[etype]; int elayout = corenrn.get_mech_data_layout()[etype]; int* e_permute = eml->_permute; int i_ecnt, i_esz, padded_ecnt; int ix = *pd - edata0; if (elayout == Layout::AoS) { padded_ecnt = ecnt; i_ecnt = ix / esz; i_esz = ix % esz; } else { // SoA assert(elayout == Layout::SoA); padded_ecnt = nrn_soa_padded_size(ecnt, elayout); i_ecnt = ix % padded_ecnt; i_esz = ix / padded_ecnt; } int i_ecnt_new = e_permute ? e_permute[i_ecnt] : i_ecnt; int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout); *pd = ix_new + edata0; } else { nrn_assert(0); } } } else if (s >= 0 && s < 1000) { // ion int etype = s; int elayout = corenrn.get_mech_data_layout()[etype]; Memb_list* eml = nt._ml_list[etype]; int edata0 = eml->data - nt._data; int ecnt = eml->nodecount; int esz = corenrn.get_prop_param_size()[etype]; int* e_permute = eml->_permute; for (int iml = 0; iml < cnt; ++iml) { int* pd = pdata + nrn_i_layout(iml, cnt, i, psz, layout); int ix = *pd - edata0; // from ix determine i_ecnt and i_esz (need to permute i_ecnt) int i_ecnt, i_esz, padded_ecnt; if (elayout == Layout::AoS) { padded_ecnt = ecnt; i_ecnt = ix / esz; i_esz = ix % esz; } else { // SoA assert(elayout == Layout::SoA); padded_ecnt = nrn_soa_padded_size(ecnt, elayout); i_ecnt = ix % padded_ecnt; i_esz = ix / padded_ecnt; } int i_ecnt_new = e_permute[i_ecnt]; int ix_new = nrn_i_layout(i_ecnt_new, ecnt, i_esz, esz, elayout); *pd = ix_new + edata0; } } } } void node_permute(int* vec, int n, int* permute) { for (int i = 0; i < n; ++i) { if (vec[i] >= 0) { vec[i] = permute[vec[i]]; } } } void permute_ptr(int* vec, int n, int* p) { permute(vec, n, 1, 1, p); } void permute_data(double* vec, int n, int* p) { permute(vec, n, 1, 1, p); } void permute_ml(Memb_list* ml, int type, NrnThread& nt) { int sz = corenrn.get_prop_param_size()[type]; int psz = corenrn.get_prop_dparam_size()[type]; int layout = corenrn.get_mech_data_layout()[type]; permute(ml->data, ml->nodecount, sz, layout, ml->_permute); permute(ml->pdata, ml->nodecount, psz, layout, ml->_permute); update_pdata_values(ml, type, nt); } int nrn_index_permute(int ix, int type, Memb_list* ml) { int* p = ml->_permute; if (!p) { return ix; } int layout = corenrn.get_mech_data_layout()[type]; if (layout == Layout::AoS) { int sz = corenrn.get_prop_param_size()[type]; int i_cnt = ix / sz; int i_sz = ix % sz; return p[i_cnt] * sz + i_sz; } else { assert(layout == Layout::SoA); int padded_cnt = nrn_soa_padded_size(ml->nodecount, layout); int i_cnt = ix % padded_cnt; int i_sz = ix / padded_cnt; return i_sz * padded_cnt + p[i_cnt]; } } #if CORENRN_DEBUG static void pr(const char* s, int* x, int n) { printf("%s:", s); for (int i = 0; i < n; ++i) { printf(" %d %d", i, x[i]); } printf("\n"); } static void pr(const char* s, double* x, int n) { printf("%s:", s); for (int i = 0; i < n; ++i) { printf(" %d %g", i, x[i]); } printf("\n"); } #endif // note that sort_indices has the sense of an inverse permutation in that // the value of sort_indices[0] is the index with the smallest value in the // indices array static bool nrn_index_sort_cmp(const std::pair& a, const std::pair& b) { bool result = false; if (a.first < b.first) { result = true; } else if (a.first == b.first) { if (a.second < b.second) { result = true; } } return result; } static int* nrn_index_sort(int* values, int n) { std::vector> vi(n); for (int i = 0; i < n; ++i) { vi[i].first = values[i]; vi[i].second = i; } std::sort(vi.begin(), vi.end(), nrn_index_sort_cmp); int* sort_indices = new int[n]; for (int i = 0; i < n; ++i) { sort_indices[i] = vi[i].second; } return sort_indices; } void permute_nodeindices(Memb_list* ml, int* p) { // nodeindices values are permuted according to p (that per se does // not affect vec). node_permute(ml->nodeindices, ml->nodecount, p); // Then the new node indices are sorted by // increasing index. Instances using the same node stay in same // original relative order so that their contributions to rhs, d (if any) // remain in same order (except for gpu parallelism). // That becomes ml->_permute ml->_permute = nrn_index_sort(ml->nodeindices, ml->nodecount); invert_permute(ml->_permute, ml->nodecount); permute_ptr(ml->nodeindices, ml->nodecount, ml->_permute); } } // namespace coreneuron ================================================ FILE: coreneuron/permute/node_permute.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/sim/multicore.hpp" namespace coreneuron { // determine ml->_permute and permute the ml->nodeindices accordingly void permute_nodeindices(Memb_list* ml, int* permute); // vec values >= 0 updated according to permutation void node_permute(int* vec, int n, int* permute); // moves values to new location but does not change those values void permute_ptr(int* vec, int n, int* permute); void permute_data(double* vec, int n, int* permute); void permute_ml(Memb_list* ml, int type, NrnThread& nt); int nrn_index_permute(int, int type, Memb_list* ml); int* inverse_permute(int* p, int n); int type_of_ntdata(NrnThread&, int index, bool reset); } // namespace coreneuron ================================================ FILE: coreneuron/sim/fadvance_core.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/coreneuron.hpp" #include "coreneuron/nrnconf.h" #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/io/reports/nrnreport.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/network/netpar.hpp" #include "coreneuron/network/partrans.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/utils/progressbar/progressbar.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/io/nrn2core_direct.h" namespace coreneuron { static void* nrn_fixed_step_thread(NrnThread*); static void nrn_fixed_step_group_thread(NrnThread*, int, int, int&); namespace { class ProgressBar final { progressbar* pbar; int current_step = 0; bool show; constexpr static int progressbar_update_steps = 5; public: ProgressBar(int nsteps) : show(nrnmpi_myid == 0 && !corenrn_param.is_quiet()) { if (show) { printf("\n"); pbar = progressbar_new("psolve", nsteps); } } void update(int step, double time) { current_step = step; if (show && (current_step % progressbar_update_steps) == 0) { progressbar_update(pbar, current_step, time); } } void step(double time) { update(current_step + 1, time); } ~ProgressBar() { if (show) { progressbar_finish(pbar); } } }; } // unnamed namespace void dt2thread(double adt) { /* copied from nrnoc/fadvance.c */ if (adt != nrn_threads[0]._dt) { for (int i = 0; i < nrn_nthread; ++i) { NrnThread* nt = nrn_threads + i; nt->_t = t; nt->_dt = dt; if (secondorder) { nt->cj = 2.0 / dt; } else { nt->cj = 1.0 / dt; } nrn_pragma_acc(update device(nt->_t, nt->_dt, nt->cj) async(nt->stream_id) if (nt->compute_gpu)) // clang-format off nrn_pragma_omp(target update to(nt->_t, nt->_dt, nt->cj) if(nt->compute_gpu)) // clang-format on } } } void nrn_fixed_step_minimal() { /* not so minimal anymore with gap junctions */ Instrumentor::phase p_timestep("timestep"); if (t != nrn_threads->_t) { dt2thread(-1.); } else { dt2thread(dt); } nrn_thread_table_check(); nrn_multithread_job(nrn_fixed_step_thread); if (nrn_have_gaps) { { Instrumentor::phase p_gap("gap-v-transfer"); nrnmpi_v_transfer(); } nrn_multithread_job(nrn_fixed_step_lastpart); } #if NRNMPI if (nrn_threads[0]._stop_stepping) { nrn_spike_exchange(nrn_threads); } #endif #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) { Instrumentor::phase p("flush_reports"); nrn_flush_reports(nrn_threads[0]._t); } #endif t = nrn_threads[0]._t; } /* better cache efficiency since a thread can do an entire minimum delay integration interval before joining */ /// --> Coreneuron void nrn_fixed_single_steps_minimal(int total_sim_steps, double tstop) { ProgressBar progress_bar(total_sim_steps); #if NRNMPI double updated_tstop = tstop - dt; nrn_assert(nrn_threads->_t <= tstop); // It may very well be the case that we do not advance at all while (nrn_threads->_t <= updated_tstop) { #else double updated_tstop = tstop - .5 * dt; while (nrn_threads->_t < updated_tstop) { #endif nrn_fixed_step_minimal(); if (stoprun) { break; } progress_bar.step(nrn_threads[0]._t); } } void nrn_fixed_step_group_minimal(int total_sim_steps) { dt2thread(dt); nrn_thread_table_check(); int step_group_n = total_sim_steps; int step_group_begin = 0; int step_group_end = 0; ProgressBar progress_bar(step_group_n); while (step_group_end < step_group_n) { nrn_multithread_job(nrn_fixed_step_group_thread, step_group_n, step_group_begin, step_group_end); #if NRNMPI nrn_spike_exchange(nrn_threads); #endif #if defined(ENABLE_BIN_REPORTS) || defined(ENABLE_SONATA_REPORTS) { Instrumentor::phase p("flush_reports"); nrn_flush_reports(nrn_threads[0]._t); } #endif if (stoprun) { break; } step_group_begin = step_group_end; progress_bar.update(step_group_end, nrn_threads[0]._t); } t = nrn_threads[0]._t; } static void nrn_fixed_step_group_thread(NrnThread* nth, int step_group_max, int step_group_begin, int& step_group_end) { nth->_stop_stepping = 0; for (int i = step_group_begin; i < step_group_max; ++i) { Instrumentor::phase p_timestep("timestep"); nrn_fixed_step_thread(nth); if (nth->_stop_stepping) { if (nth->id == 0) { step_group_end = i + 1; } nth->_stop_stepping = 0; return; } } if (nth->id == 0) { step_group_end = step_group_max; } } void update(NrnThread* _nt) { double* vec_v = &(VEC_V(0)); double* vec_rhs = &(VEC_RHS(0)); int i2 = _nt->end; /* do not need to worry about linmod or extracellular*/ if (secondorder) { nrn_pragma_acc(parallel loop present(vec_v [0:i2], vec_rhs [0:i2]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) for (int i = 0; i < i2; ++i) { vec_v[i] += 2. * vec_rhs[i]; } } else { nrn_pragma_acc(parallel loop present(vec_v [0:i2], vec_rhs [0:i2]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) for (int i = 0; i < i2; ++i) { vec_v[i] += vec_rhs[i]; } } if (_nt->tml) { assert(_nt->tml->index == CAP); nrn_cur_capacitance(_nt, _nt->tml->ml, _nt->tml->index); } if (nrn_use_fast_imem) { nrn_calc_fast_imem(_nt); } } void nonvint(NrnThread* _nt) { if (nrn_have_gaps) { Instrumentor::phase p("gap-v-transfer"); nrnthread_v_transfer(_nt); } errno = 0; Instrumentor::phase_begin("state-update"); for (auto tml = _nt->tml; tml; tml = tml->next) if (corenrn.get_memb_func(tml->index).state) { mod_f_t s = corenrn.get_memb_func(tml->index).state; std::string ss("state-"); ss += nrn_get_mechname(tml->index); { Instrumentor::phase p(ss.c_str()); (*s)(_nt, tml->ml, tml->index); } #ifdef DEBUG if (errno) { hoc_warning("errno set during calculation of states", nullptr); } #endif } Instrumentor::phase_end("state-update"); } void nrn_ba(NrnThread* nt, int bat) { for (auto tbl = nt->tbl[bat]; tbl; tbl = tbl->next) { mod_f_t f = tbl->bam->f; int type = tbl->bam->type; Memb_list* ml = tbl->ml; (*f)(nt, ml, type); } } void nrncore2nrn_send_init() { if (nrn2core_trajectory_values_ == nullptr) { // standalone execution : no callbacks return; } // if per time step transfer, need to call nrn_record_init() in NEURON. // if storing full trajectories in CoreNEURON, need to initialize // vsize for all the trajectory requests. (*nrn2core_trajectory_values_)(-1, 0, nullptr, 0.0); for (int tid = 0; tid < nrn_nthread; ++tid) { NrnThread& nt = nrn_threads[tid]; if (nt.trajec_requests) { nt.trajec_requests->vsize = 0; } } } void nrncore2nrn_send_values(NrnThread* nth) { if (nrn2core_trajectory_values_ == nullptr) { // standalone execution : no callbacks return; } TrajectoryRequests* tr = nth->trajec_requests; if (tr) { if (tr->varrays) { // full trajectories into Vector data int vs = tr->vsize++; // make sure we do not overflow the `varrays` buffers assert(vs < tr->bsize); nrn_pragma_acc(parallel loop present(tr [0:1]) if (nth->compute_gpu) async(nth->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nth->compute_gpu)) for (int i = 0; i < tr->n_trajec; ++i) { tr->varrays[i][vs] = *tr->gather[i]; } } else if (tr->scatter) { // scatter to NEURON and notify each step. nrn_assert(nrn2core_trajectory_values_); // Note that this is rather inefficient: we generate one `acc update // self` call for each `double` value (voltage, membrane current, // mechanism property, ...) that is being recorded, even though in most // cases these values will actually fall in a small number of contiguous // ranges in memory. A better solution, if the performance of this // branch becomes limiting, might be to offload this loop to the // device and populate some `scatter_values` array there and copy it // back with a single transfer. Note that the `async` clause here // should guarantee that correct values are reported even of // mechanism data that is updated in `nrn_state`. See also: // https://github.com/BlueBrain/CoreNeuron/issues/611 for (int i = 0; i < tr->n_trajec; ++i) { double* gather_i = tr->gather[i]; static_cast(gather_i); nrn_pragma_acc(update self(gather_i [0:1]) if (nth->compute_gpu) async(nth->stream_id)) nrn_pragma_omp(target update from(gather_i [0:1]) if (nth->compute_gpu)) } nrn_pragma_acc(wait(nth->stream_id)) for (int i = 0; i < tr->n_trajec; ++i) { *(tr->scatter[i]) = *(tr->gather[i]); } (*nrn2core_trajectory_values_)(nth->id, tr->n_pr, tr->vpr, nth->_t); } } } static void* nrn_fixed_step_thread(NrnThread* nth) { /* check thresholds and deliver all (including binqueue) events up to t+dt/2 */ { Instrumentor::phase p("deliver-events"); deliver_net_events(nth); } nth->_t += .5 * nth->_dt; if (nth->ncell) { /*@todo: do we need to update nth->_t on GPU: Yes (Michael, but can launch kernel) */ nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->stream_id)) nrn_pragma_acc(wait(nth->stream_id)) nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu)) fixed_play_continuous(nth); { Instrumentor::phase p("setup-tree-matrix"); setup_tree_matrix_minimal(nth); } { Instrumentor::phase p("matrix-solver"); nrn_solve_minimal(nth); } { Instrumentor::phase p("second-order-cur"); second_order_cur(nth, secondorder); } { Instrumentor::phase p("update"); update(nth); } } if (!nrn_have_gaps) { nrn_fixed_step_lastpart(nth); } return nullptr; } void* nrn_fixed_step_lastpart(NrnThread* nth) { nth->_t += .5 * nth->_dt; if (nth->ncell) { /*@todo: do we need to update nth->_t on GPU */ nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->stream_id)) nrn_pragma_acc(wait(nth->stream_id)) nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu)) fixed_play_continuous(nth); nonvint(nth); nrn_ba(nth, AFTER_SOLVE); nrn_ba(nth, BEFORE_STEP); nrncore2nrn_send_values(nth); // consistent with NEURON. (after BEFORE_STEP) } else { nrncore2nrn_send_values(nth); } { Instrumentor::phase p("deliver-events"); nrn_deliver_events(nth); /* up to but not past texit */ } return nullptr; } } // namespace coreneuron ================================================ FILE: coreneuron/sim/fast_imem.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include "coreneuron/nrnconf.h" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/utils/nrnoc_aux.hpp" namespace coreneuron { extern int nrn_nthread; extern NrnThread* nrn_threads; bool nrn_use_fast_imem; void fast_imem_free() { for (auto nt = nrn_threads; nt < nrn_threads + nrn_nthread; ++nt) { if (nt->nrn_fast_imem) { free_memory(nt->nrn_fast_imem->nrn_sav_rhs); free_memory(nt->nrn_fast_imem->nrn_sav_d); free_memory(nt->nrn_fast_imem); nt->nrn_fast_imem = nullptr; } } } void nrn_fast_imem_alloc() { if (nrn_use_fast_imem) { fast_imem_free(); for (auto nt = nrn_threads; nt < nrn_threads + nrn_nthread; ++nt) { int n = nt->end; nt->nrn_fast_imem = (NrnFastImem*) ecalloc_align(1, sizeof(NrnFastImem)); nt->nrn_fast_imem->nrn_sav_rhs = (double*) ecalloc_align(n, sizeof(double)); nt->nrn_fast_imem->nrn_sav_d = (double*) ecalloc_align(n, sizeof(double)); } } } void nrn_calc_fast_imem(NrnThread* nt) { int i1 = 0; int i3 = nt->end; double* vec_rhs = nt->_actual_rhs; double* vec_area = nt->_actual_area; double* fast_imem_d = nt->nrn_fast_imem->nrn_sav_d; double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs; nrn_pragma_acc( parallel loop present(vec_rhs, vec_area, fast_imem_d, fast_imem_rhs) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (int i = i1; i < i3; ++i) { fast_imem_rhs[i] = (fast_imem_d[i] * vec_rhs[i] + fast_imem_rhs[i]) * vec_area[i] * 0.01; } } void nrn_calc_fast_imem_init(NrnThread* nt) { // See the corresponding NEURON nrn_calc_fast_imem_fixedstep_init int i1 = 0; int i3 = nt->end; double* vec_rhs = nt->_actual_rhs; double* vec_area = nt->_actual_area; double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs; nrn_pragma_acc(parallel loop present(vec_rhs, vec_area, fast_imem_rhs) if (nt->compute_gpu) async(nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu)) for (int i = i1; i < i3; ++i) { fast_imem_rhs[i] = (vec_rhs[i] + fast_imem_rhs[i]) * vec_area[i] * 0.01; } } } // namespace coreneuron ================================================ FILE: coreneuron/sim/fast_imem.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/sim/multicore.hpp" namespace coreneuron { /* Bool global variable to define if the fast_imem * calculations should be enabled. */ extern bool nrn_use_fast_imem; /* Free memory allocated for the fast current membrane calculation. * Found in src/nrnoc/multicore.c in NEURON. */ void fast_imem_free(); /* fast_imem_alloc() wrapper. * Found in src/nrnoc/multicore.c in NEURON. */ void nrn_fast_imem_alloc(); /* Calculate the new values of rhs array at every timestep. * Found in src/nrnoc/fadvance.cpp in NEURON. */ void nrn_calc_fast_imem(NrnThread* _nt); /* Initialization used only in offline (file) mode. * See NEURON nrn_calc_fast_imem_fixedstep_init in src/nrnoc/fadvance.cpp */ void nrn_calc_fast_imem_init(NrnThread* _nt); } // namespace coreneuron ================================================ FILE: coreneuron/sim/finitialize.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/nrnconf.h" #include "coreneuron/network/netpar.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/sim/fast_imem.hpp" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/coreneuron.hpp" namespace coreneuron { bool _nrn_skip_initmodel; void allocate_data_in_mechanism_nrn_init() { // In case some nrn_init allocates data that we need. In this case // we want to call nrn_init but not execute initmodel i.e. INITIAL // block. For this, set _nrn_skip_initmodel to True temporarily // , execute nrn_init and return. _nrn_skip_initmodel = true; for (int i = 0; i < nrn_nthread; ++i) { // could be parallel NrnThread& nt = nrn_threads[i]; for (NrnThreadMembList* tml = nt.tml; tml; tml = tml->next) { Memb_list* ml = tml->ml; mod_f_t s = corenrn.get_memb_func(tml->index).initialize; if (s) { (*s)(&nt, ml, tml->index); } } } _nrn_skip_initmodel = false; } void nrn_finitialize(int setv, double v) { Instrumentor::phase_begin("finitialize"); t = 0.; dt2thread(-1.); nrn_thread_table_check(); clear_event_queue(); nrn_spike_exchange_init(); #if VECTORIZE nrn_play_init(); /* Vector.play */ /// Play events should be executed before initializing events for (int i = 0; i < nrn_nthread; ++i) { nrn_deliver_events(nrn_threads + i); /* The play events at t=0 */ } if (setv) { for (auto _nt = nrn_threads; _nt < nrn_threads + nrn_nthread; ++_nt) { double* vec_v = &(VEC_V(0)); nrn_pragma_acc( parallel loop present(_nt [0:1], vec_v [0:_nt->end]) if (_nt->compute_gpu)) nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu)) for (int i = 0; i < _nt->end; ++i) { vec_v[i] = v; } } } if (nrn_have_gaps) { Instrumentor::phase p("gap-v-transfer"); nrnmpi_v_transfer(); for (int i = 0; i < nrn_nthread; ++i) { nrnthread_v_transfer(nrn_threads + i); } } for (int i = 0; i < nrn_nthread; ++i) { nrn_ba(nrn_threads + i, BEFORE_INITIAL); } /* the INITIAL blocks are ordered so that mechanisms that write concentrations are after ions and before mechanisms that read concentrations. */ /* the memblist list in NrnThread is already so ordered */ for (int i = 0; i < nrn_nthread; ++i) { NrnThread* nt = nrn_threads + i; for (auto tml = nt->tml; tml; tml = tml->next) { mod_f_t s = corenrn.get_memb_func(tml->index).initialize; if (s) { (*s)(nt, tml->ml, tml->index); } } } #endif init_net_events(); for (int i = 0; i < nrn_nthread; ++i) { nrn_ba(nrn_threads + i, AFTER_INITIAL); } for (int i = 0; i < nrn_nthread; ++i) { nrn_deliver_events(nrn_threads + i); /* The INITIAL sent events at t=0 */ } for (int i = 0; i < nrn_nthread; ++i) { setup_tree_matrix_minimal(nrn_threads + i); if (nrn_use_fast_imem) { nrn_calc_fast_imem_init(nrn_threads + i); } } for (int i = 0; i < nrn_nthread; ++i) { nrn_ba(nrn_threads + i, BEFORE_STEP); } nrncore2nrn_send_init(); for (int i = 0; i < nrn_nthread; ++i) { nrncore2nrn_send_values(nrn_threads + i); } // Consistent with NEURON. BEFORE_STEP and fixed_record_continuous before nrn_deliver_events. for (int i = 0; i < nrn_nthread; ++i) { nrn_deliver_events(nrn_threads + i); /* The record events at t=0 */ } #if NRNMPI nrn_spike_exchange(nrn_threads); #endif Instrumentor::phase_end("finitialize"); } } // namespace coreneuron ================================================ FILE: coreneuron/sim/multicore.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" /* Now that threads have taken over the actual_v, v_node, etc, it might be a good time to regularize the method of freeing, allocating, and updating those arrays. To recapitulate the history, Node used to be the structure that held direct values for v, area, d, rhs, etc. That continued to hold for the cray vectorization project which introduced v_node, v_parent, memb_list. Cache efficiency introduced actual_v, actual_area, actual_d, etc and the Node started pointing into those arrays. Additional nodes after allocation required updating pointers to v and area since those arrays were freed and reallocated. Now, the threads hold all these arrays and we want to update them properly under the circumstances of changing topology, changing number of threads, and changing distribution of cells on threads. Note there are no longer global versions of any of these arrays. We do not want to update merely due to a change in area. Recently we have dealt with diam, area, ri on a section basis. We generally desire an update just before a simulation when the efficient structures are necessary. This is reasonably well handled by the v_structure_change flag which historically freed and reallocated v_node and v_parent and, just before this comment, ended up setting the NrnThread tml. This makes most of the old memb_list vestigial and we now got rid of it except for the artificial cells (and it is possibly not really necessary there). Switching between sparse and tree matrix just cause freeing and reallocation of actual_rhs. If we can get the freeing, reallocation, and pointer update correct for _actual_v, I am guessing everything else can be dragged along with it. We have two major cases, call to pc.nthread and change in model structure. We want to use Node* as much as possible and defer the handling of v_structure_change as long as possible. */ namespace coreneuron { CoreNeuron corenrn; int nrn_nthread = 0; NrnThread* nrn_threads = nullptr; void (*nrn_mk_transfer_thread_data_)(); /// --> CoreNeuron class static int table_check_cnt_; static ThreadDatum* table_check_; NrnThreadMembList* create_tml(NrnThread& nt, int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt, const std::vector& mech_types, const std::vector& nodecounts) { auto tml = (NrnThreadMembList*) emalloc_align(sizeof(NrnThreadMembList), 0); tml->next = nullptr; tml->index = mech_types[mech_id]; tml->ml = (Memb_list*) ecalloc_align(1, sizeof(Memb_list), 0); tml->ml->_net_receive_buffer = nullptr; tml->ml->_net_send_buffer = nullptr; tml->ml->_permute = nullptr; if (memb_func.alloc == nullptr) { hoc_execerror(memb_func.sym, "mechanism does not exist"); } tml->ml->nodecount = nodecounts[mech_id]; if (!memb_func.sym) { printf("%s (type %d) is not available\n", nrn_get_mechname(tml->index), tml->index); exit(1); } tml->ml->_nodecount_padded = nrn_soa_padded_size(tml->ml->nodecount, corenrn.get_mech_data_layout()[tml->index]); if (memb_func.is_point && corenrn.get_is_artificial()[tml->index] == 0) { // Avoid race for multiple PointProcess instances in same compartment. if (tml->ml->nodecount > shadow_rhs_cnt) { shadow_rhs_cnt = tml->ml->nodecount; } } if (auto* const priv_ctor = corenrn.get_memb_func(tml->index).private_constructor) { priv_ctor(&nt, tml->ml, tml->index); } return tml; } void nrn_threads_create(int n) { if (nrn_nthread != n) { /*printf("sizeof(NrnThread)=%d sizeof(Memb_list)=%d\n", sizeof(NrnThread), * sizeof(Memb_list));*/ nrn_threads = nullptr; nrn_nthread = n; if (n > 0) { nrn_threads = new NrnThread[n]; for (int i = 0; i < nrn_nthread; ++i) { NrnThread& nt = nrn_threads[i]; nt.id = i; for (int j = 0; j < BEFORE_AFTER_SIZE; ++j) { nt.tbl[j] = nullptr; } } } v_structure_change = 1; diam_changed = 1; } /*printf("nrn_threads_create %d %d\n", nrn_nthread, nrn_thread_parallel_);*/ } void nrn_threads_free() { if (nrn_nthread) { delete[] nrn_threads; nrn_threads = nullptr; nrn_nthread = 0; } } void nrn_mk_table_check() { if (table_check_) { free((void*) table_check_); table_check_ = nullptr; } auto& memb_func = corenrn.get_memb_funcs(); // Allocate int array of size of mechanism types std::vector ix(memb_func.size(), -1); table_check_cnt_ = 0; for (int id = 0; id < nrn_nthread; ++id) { auto& nt = nrn_threads[id]; for (auto tml = nt.tml; tml; tml = tml->next) { int index = tml->index; if (memb_func[index].thread_table_check_ && ix[index] == -1) { ix[index] = id; table_check_cnt_ += 2; } } } if (table_check_cnt_) { table_check_ = (ThreadDatum*) emalloc(table_check_cnt_ * sizeof(ThreadDatum)); } int i = 0; for (int id = 0; id < nrn_nthread; ++id) { auto& nt = nrn_threads[id]; for (auto tml = nt.tml; tml; tml = tml->next) { int index = tml->index; if (memb_func[index].thread_table_check_ && ix[index] == id) { table_check_[i++].i = id; table_check_[i++]._pvoid = (void*) tml; } } } } void nrn_thread_table_check() { for (int i = 0; i < table_check_cnt_; i += 2) { auto& nt = nrn_threads[table_check_[i].i]; auto tml = static_cast(table_check_[i + 1]._pvoid); Memb_list* ml = tml->ml; (*corenrn.get_memb_func(tml->index).thread_table_check_)( 0, ml->_nodecount_padded, ml->data, ml->pdata, ml->_thread, &nt, ml, tml->index); } } } // namespace coreneuron ================================================ FILE: coreneuron/sim/multicore.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/nrnconf.h" #include "coreneuron/mechanism/membfunc.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/io/reports/nrnreport.hpp" #include #include namespace coreneuron { class NetCon; class PreSyn; extern bool use_solve_interleave; /* Point_process._presyn, used only if its NET_RECEIVE sends a net_event, is eliminated. Needed only by net_event function. Replaced by PreSyn* = nt->presyns + nt->pnt2presyn_ix[pnttype2presyn[pnt->_type]][pnt->_i_instance]; */ struct NrnThreadMembList { /* patterned after CvMembList in cvodeobj.h */ NrnThreadMembList* next; Memb_list* ml; int index; int* dependencies; /* list of mechanism types that this mechanism depends on*/ int ndependencies; /* for scheduling we need to know the dependency count */ }; NrnThreadMembList* create_tml(NrnThread& nt, int mech_id, Memb_func& memb_func, int& shadow_rhs_cnt, const std::vector& mech_types, const std::vector& nodecounts); struct NrnThreadBAList { Memb_list* ml; /* an item in the NrnThreadMembList */ BAMech* bam; NrnThreadBAList* next; }; struct NrnFastImem { double* nrn_sav_rhs; double* nrn_sav_d; }; struct TrajectoryRequests { void** vpr; /* PlayRecord Objects known by NEURON */ double** scatter; /* if bsize == 0, each time step */ double** varrays; /* if bsize > 0, the Vector data pointers. */ double** gather; /* pointers to values that get scattered to NEURON */ int n_pr; /* number of PlayRecord instances */ int n_trajec; /* number of trajectories requested */ int bsize; /* buffer size of the Vector data */ int vsize; /* number of elements in varrays so far */ }; /* for OpenACC, in order to avoid an error while update PreSyn, with virtual base * class, we are adding helper with flag variable which could be updated on GPU */ struct PreSynHelper { int flag_; }; struct NrnThread: public MemoryManaged { double _t = 0; double _dt = -1e9; double cj = 0.0; NrnThreadMembList* tml = nullptr; Memb_list** _ml_list = nullptr; Point_process* pntprocs = nullptr; // synapses and artificial cells with and without gid PreSyn* presyns = nullptr; // all the output PreSyn with and without gid PreSynHelper* presyns_helper = nullptr; int** pnt2presyn_ix = nullptr; // eliminates Point_process._presyn used only by net_event // sender. NetCon* netcons = nullptr; double* weights = nullptr; // size n_weight. NetCon.weight_ points into this array. int n_pntproc = 0; int n_weight = 0; int n_netcon = 0; int n_input_presyn = 0; int n_presyn = 0; // only for model_size int n_real_output = 0; // for checking their thresholds. int ncell = 0; /* analogous to old rootnodecount */ int end = 0; /* 1 + position of last in v_node array. Now v_node_count. */ int id = 0; /* this is nrn_threads[id] */ int _stop_stepping = 0; int n_vecplay = 0; /* number of instances of VecPlayContinuous */ size_t _ndata = 0; size_t _nvdata = 0; size_t _nidata = 0; /* sizes */ double* _data = nullptr; /* all the other double* and Datum to doubles point into here*/ int* _idata = nullptr; /* all the Datum to ints index into here */ void** _vdata = nullptr; /* all the Datum to pointers index into here */ void** _vecplay = nullptr; /* array of instances of VecPlayContinuous */ double* _actual_rhs = nullptr; double* _actual_d = nullptr; double* _actual_a = nullptr; double* _actual_b = nullptr; double* _actual_v = nullptr; double* _actual_area = nullptr; double* _actual_diam = nullptr; /* nullptr if no mechanism has dparam with diam semantics */ double* _shadow_rhs = nullptr; /* Not pointer into _data. Avoid race for multiple POINT_PROCESS in same compartment */ double* _shadow_d = nullptr; /* Not pointer into _data. Avoid race for multiple POINT_PROCESS in same compartment */ /* Fast membrane current calculation struct */ NrnFastImem* nrn_fast_imem = nullptr; int* _v_parent_index = nullptr; int* _permute = nullptr; char* _sp13mat = nullptr; /* handle to general sparse matrix */ Memb_list* _ecell_memb_list = nullptr; /* normally nullptr */ double _ctime = 0.0; /* computation time in seconds (using nrnmpi_wtime) */ NrnThreadBAList* tbl[BEFORE_AFTER_SIZE]; /* wasteful since almost all empty */ int shadow_rhs_cnt = 0; /* added to facilitate the NrnThread transfer to GPU */ int compute_gpu = 0; /* define whether to compute with gpus */ int stream_id = 0; /* define where the kernel will be launched on GPU stream */ int _net_send_buffer_size = 0; int _net_send_buffer_cnt = 0; int* _net_send_buffer = nullptr; int* _watch_types = nullptr; /* nullptr or 0 terminated array of integers */ void* mapping = nullptr; /* section to segment mapping information */ std::unique_ptr summation_report_handler_; /* report to ALU (values of the current summation */ TrajectoryRequests* trajec_requests = nullptr; /* per time step values returned to NEURON */ /* Needed in case there are FOR_NETCON statements in use. */ std::size_t _fornetcon_perm_indices_size{}; /* length of _fornetcon_perm_indices */ size_t* _fornetcon_perm_indices{}; /* displacement like list of indices */ std::size_t _fornetcon_weight_perm_size{}; /* length of _fornetcon_weight_perm */ size_t* _fornetcon_weight_perm{}; /* permutation indices into weight */ std::vector _pnt_offset; /* for SelfEvent queue transfer */ }; extern void nrn_threads_create(int n); extern int nrn_nthread; extern NrnThread* nrn_threads; template void nrn_multithread_job(F&& job, Args&&... args) { int i; // clang-format off #pragma omp parallel for private(i) shared(nrn_threads, job, nrn_nthread, \ nrnmpi_myid) schedule(static, 1) // FIXME: multiple forwarding of the same arguments... for (i = 0; i < nrn_nthread; ++i) { job(nrn_threads + i, std::forward(args)...); } // clang-format on } extern void nrn_thread_table_check(void); extern void nrn_threads_free(void); extern bool _nrn_skip_initmodel; extern void dt2thread(double); extern void clear_event_queue(void); extern void nrn_ba(NrnThread*, int); extern void* nrn_fixed_step_lastpart(NrnThread*); extern void nrn_solve_minimal(NrnThread*); extern void nrncore2nrn_send_init(); extern void* setup_tree_matrix_minimal(NrnThread*); extern void nrncore2nrn_send_values(NrnThread*); extern void nrn_fixed_step_group_minimal(int total_sim_steps); extern void nrn_fixed_single_steps_minimal(int total_sim_steps, double tstop); extern void nrn_fixed_step_minimal(void); extern void nrn_finitialize(int setv, double v); extern void direct_mode_initialize(); extern void nrn_mk_table_check(void); extern void nonvint(NrnThread* _nt); extern void update(NrnThread*); constexpr int at_time(NrnThread* nt, double te) { double x = te - 1e-11; if (x <= nt->_t && x > (nt->_t - nt->_dt)) { return 1; } return 0; } } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/abort.cpp ================================================ /****************************************************************************** * * File: abort.c * * Copyright (c) 1984, 1985, 1986, 1987, 1988, 1989, 1990 * Duke University * ******************************************************************************/ #include "coreneuron/utils/nrnoc_aux.hpp" /*----------------------------------------------------------------------------- * * ABORT_RUN() * * Prints out an error message and returns to the main menu if a solver * routine returns a nonzero error code. * * Calling sequence: abort_run(code) * * Argument: code int flag for error * * Returns: * * Functions called: abs(), cls(), cursrpos(), puts(), gets() * * Files accessed: *---------------------------------------------------------------------------*/ #include #include #include "errcodes.h" namespace coreneuron { int abort_run(int code) { switch ((code >= 0) ? code : -code) { case EXCEED_ITERS: puts("Convergence not achieved in maximum number of iterations"); break; case SINGULAR: puts("The matrix in the solution method is singular or ill-conditioned"); break; case PRECISION: puts( "The increment in the independent variable is less than machine " "roundoff error"); break; case CORR_FAIL: puts("The corrector failed to satisfy the error check"); break; case DIVERGED: puts("The corrector iteration diverged"); break; case INCONSISTENT: puts("Inconsistent boundary conditions"); puts("Convergence not acheived in maximum number of iterations"); break; case BAD_START: puts("Poor starting estimate for initial conditions"); puts("The matrix in the solution method is singular or ill-conditioned"); break; case NODATA: puts("No data found in data file"); break; case NO_SOLN: puts("No solution was obtained for the coefficients"); break; case LOWMEM: puts("Insufficient memory to run the model"); break; case DIVCHECK: puts("Attempt to divide by zero"); break; case NOFORCE: puts( "Could not open forcing function file\nThe model cannot be run " "without the forcing function"); break; case NEG_ARG: puts("Cannot compute factorial of negative argument"); break; case RANGE: puts( "Value of variable is outside the range of the forcing function data " "table"); break; default: puts("Origin of error is unknown"); } hoc_execerror("scopmath library error", (char*) 0); return 0; } } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/crout_thread.hpp ================================================ /* # ============================================================================= # Originally crout.c from SCoP library, Copyright (c) 1987-90 Duke University # ============================================================================= # Subsequent extensive prototype and memory layout changes for CoreNEURON # # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/sim/scopmath/errcodes.h" #include "coreneuron/sim/scopmath/newton_struct.h" namespace coreneuron { #if defined(scopmath_crout_ix) || defined(scopmath_crout_y) || defined(scopmath_crout_b) #error "naming clash on crout_thread.hpp-internal macros" #endif #define scopmath_crout_b(arg) b[scopmath_crout_ix(arg)] #define scopmath_crout_ix(arg) ((arg) *_STRIDE) #define scopmath_crout_y(arg) _p[y[arg] * _STRIDE] /** * Performs an LU triangular factorization of a real matrix by the Crout * algorithm using partial pivoting. Rows are not normalized; implicit * equilibration is used. ROUNDOFF is the minimal value for a pivot element * without its being considered too close to zero (currently set to 1.0E-20). * * @return 0 if no error; 2 if matrix is singular or ill-conditioned * @param n number of rows of the matrix * @param a double precision matrix to be factored * @param[out] a factors required to transform the constant vector in the set of * simultaneous equations are stored in the lower triangle; * factors for back substitution are stored in the upper triangle. * @param[out] perm permutation vector to store row interchanges * * @note Having a differnt permutation per instance may not be a good idea. */ inline int nrn_crout_thread(NewtonSpace* ns, int n, double** a, int* perm, _threadargsproto_) { int save_i = 0; /* Initialize permutation and rowmax vectors */ double* rowmax = ns->rowmax; for (int i = 0; i < n; i++) { perm[scopmath_crout_ix(i)] = i; int k = 0; for (int j = 1; j < n; j++) if (fabs(a[i][scopmath_crout_ix(j)]) > fabs(a[i][scopmath_crout_ix(k)])) k = j; rowmax[scopmath_crout_ix(i)] = a[i][scopmath_crout_ix(k)]; } /* Loop over rows and columns r */ for (int r = 0; r < n; r++) { /* * Operate on rth column. This produces the lower triangular matrix * of terms needed to transform the constant vector. */ for (int i = r; i < n; i++) { double sum = 0.0; int irow = perm[scopmath_crout_ix(i)]; for (int k = 0; k < r; k++) { int krow = perm[scopmath_crout_ix(k)]; sum += a[irow][scopmath_crout_ix(k)] * a[krow][scopmath_crout_ix(r)]; } a[irow][scopmath_crout_ix(r)] -= sum; } /* Find row containing the pivot in the rth column */ int pivot = perm[scopmath_crout_ix(r)]; double equil_1 = fabs(a[pivot][scopmath_crout_ix(r)] / rowmax[scopmath_crout_ix(pivot)]); for (int i = r + 1; i < n; i++) { int irow = perm[scopmath_crout_ix(i)]; double equil_2 = fabs(a[irow][scopmath_crout_ix(r)] / rowmax[scopmath_crout_ix(irow)]); if (equil_2 > equil_1) { /* make irow the new pivot row */ pivot = irow; save_i = i; equil_1 = equil_2; } } /* Interchange entries in permutation vector if necessary */ if (pivot != perm[scopmath_crout_ix(r)]) { perm[scopmath_crout_ix(save_i)] = perm[scopmath_crout_ix(r)]; perm[scopmath_crout_ix(r)] = pivot; } /* Check that pivot element is not too small */ if (fabs(a[pivot][scopmath_crout_ix(r)]) < ROUNDOFF) return SINGULAR; /* * Operate on row in rth position. This produces the upper * triangular matrix whose diagonal elements are assumed to be unity. * This matrix is used in the back substitution algorithm. */ for (int j = r + 1; j < n; j++) { double sum = 0.0; for (int k = 0; k < r; k++) { int krow = perm[scopmath_crout_ix(k)]; sum += a[pivot][scopmath_crout_ix(k)] * a[krow][scopmath_crout_ix(j)]; } a[pivot][scopmath_crout_ix(j)] = (a[pivot][scopmath_crout_ix(j)] - sum) / a[pivot][scopmath_crout_ix(r)]; } } return SUCCESS; } /** * Performs forward substitution algorithm to transform the constant vector in * the linear simultaneous equations to be consistent with the factored matrix. * Then performs back substitution to find the solution to the simultaneous * linear equations. * * @param n number of rows of the matrix * @param a double precision matrix containing the factored matrix of * coefficients of the linear equations * @param b vector of function values * @param perm permutation vector to store row interchanges * @param[out] p[y[i]] contains the solution vector */ inline void nrn_scopmath_solve_thread(int n, double** a, double* b, int* perm, double* p, int* y, _threadargsproto_) { /* Perform forward substitution with pivoting */ // if (y) { // pgacc bug. nullptr on cpu but not on GPU if (0) { for (int i = 0; i < n; i++) { int pivot = perm[scopmath_crout_ix(i)]; double sum = 0.0; for (int j = 0; j < i; j++) sum += a[pivot][scopmath_crout_ix(j)] * (scopmath_crout_y(j)); scopmath_crout_y(i) = (scopmath_crout_b(pivot) - sum) / a[pivot][scopmath_crout_ix(i)]; } /* * Note that the y vector is already in the correct order for back * substitution. Perform back substitution, pivoting the matrix but not * the y vector. There is no need to divide by the diagonal element as * this is assumed to be unity. */ for (int i = n - 1; i >= 0; i--) { int pivot = perm[scopmath_crout_ix(i)]; double sum = 0.0; for (int j = i + 1; j < n; j++) sum += a[pivot][scopmath_crout_ix(j)] * (scopmath_crout_y(j)); scopmath_crout_y(i) -= sum; } } else { for (int i = 0; i < n; i++) { int pivot = perm[scopmath_crout_ix(i)]; double sum = 0.0; if (i > 0) { // pgacc bug. with i==0 the following loop executes once for (int j = 0; j < i; j++) { sum += a[pivot][scopmath_crout_ix(j)] * (p[scopmath_crout_ix(j)]); } } p[scopmath_crout_ix(i)] = (scopmath_crout_b(pivot) - sum) / a[pivot][scopmath_crout_ix(i)]; } /* * Note that the y vector is already in the correct order for back * substitution. Perform back substitution, pivoting the matrix but not * the y vector. There is no need to divide by the diagonal element as * this is assumed to be unity. */ for (int i = n - 1; i >= 0; i--) { int pivot = perm[scopmath_crout_ix(i)]; double sum = 0.0; for (int j = i + 1; j < n; j++) sum += a[pivot][scopmath_crout_ix(j)] * (p[scopmath_crout_ix(j)]); p[scopmath_crout_ix(i)] -= sum; } } } #undef scopmath_crout_b #undef scopmath_crout_ix #undef scopmath_crout_y } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/errcodes.h ================================================ /* # ============================================================================= # Originally errcodes.h from SCoP library, Copyright (c) 1984-90 Duke University # ============================================================================= # Subsequent extensive prototype and memory layout changes for CoreNEURON # # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once namespace coreneuron { extern int abort_run(int); namespace scopmath { /** @brief Flag to disable some code sections at compile time. * * Some methods, such as coreneuron::scopmath::sparse::getelm(...), decide at * runtime whether they are simply accessors, or if they dynamically modify the * matrix in question, possibly allocating new memory. Typically the second * mode will be used during model initialisation, while the first will be used * during computation/simulation. Compiling the more complicated code for the * second mode can be problematic for targets such as GPU, where dynamic * allocation and global state are complex. This enum is intended to be used as * a template parameter to flag (at compile time) when this code can be * omitted. */ enum struct enabled_code { all, compute_only }; } // namespace scopmath } // namespace coreneuron #define ROUNDOFF 1.e-20 #define ZERO 1.e-8 #define STEP 1.e-6 #define CONVERGE 1.e-6 #define MAXCHANGE 0.05 #define INITSIMPLEX 0.25 #define MAXITERS 50 #define MAXSMPLXITERS 100 #define MAXSTEPS 20 #define MAXHALVE 15 #define MAXORDER 6 #define MAXTERMS 3 #define MAXFAIL 10 #define MAX_JAC_ITERS 20 #define MAX_GOOD_ORDER 2 #define MAX_GOOD_STEPS 3 #define SUCCESS 0 #define EXCEED_ITERS 1 #define SINGULAR 2 #define PRECISION 3 #define CORR_FAIL 4 #define INCONSISTENT 5 #define BAD_START 6 #define NODATA 7 #define NO_SOLN 8 #define LOWMEM 9 #define DIVCHECK 10 #define NOFORCE 11 #define DIVERGED 12 #define NEG_ARG 13 #define RANGE 14 ================================================ FILE: coreneuron/sim/scopmath/newton_struct.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/mechanism/mech/mod2c_core_thread.hpp" namespace coreneuron { /* avoid incessant alloc/free memory */ struct NewtonSpace { int n; int n_instance; double* delta_x; double** jacobian; int* perm; double* high_value; double* low_value; double* rowmax; }; void nrn_newtonspace_copyto_device(NewtonSpace* ns); void nrn_newtonspace_delete_from_device(NewtonSpace* ns); } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/newton_thread.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include "coreneuron/sim/scopmath/newton_thread.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" namespace coreneuron { NewtonSpace* nrn_cons_newtonspace(int n, int n_instance) { NewtonSpace* ns = (NewtonSpace*) emalloc(sizeof(NewtonSpace)); ns->n = n; ns->n_instance = n_instance; ns->delta_x = makevector(n * n_instance * sizeof(double)); ns->jacobian = makematrix(n, n * n_instance); ns->perm = (int*) emalloc((unsigned) (n * n_instance * sizeof(int))); ns->high_value = makevector(n * n_instance * sizeof(double)); ns->low_value = makevector(n * n_instance * sizeof(double)); ns->rowmax = makevector(n * n_instance * sizeof(double)); nrn_newtonspace_copyto_device(ns); return ns; } void nrn_destroy_newtonspace(NewtonSpace* ns) { nrn_newtonspace_delete_from_device(ns); free((char*) ns->perm); freevector(ns->delta_x); freematrix(ns->jacobian); freevector(ns->high_value); freevector(ns->low_value); freevector(ns->rowmax); free((char*) ns); } } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/newton_thread.hpp ================================================ /* # ============================================================================= # Originally newton.c from SCoP library, Copyright (c) 1987-90 Duke University # ============================================================================= # Subsequent extensive prototype and memory layout changes for CoreNEURON # # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/sim/scopmath/errcodes.h" #include "coreneuron/sim/scopmath/newton_struct.h" #include "coreneuron/sim/scopmath/crout_thread.hpp" #include #include namespace coreneuron { #if defined(scopmath_newton_ix) || defined(scopmath_newton_s) || defined(scopmath_newton_x) #error "naming clash on newton_thread.hpp-internal macros" #endif #define scopmath_newton_ix(arg) ((arg) *_STRIDE) #define scopmath_newton_s(arg) _p[s[arg] * _STRIDE] #define scopmath_newton_x(arg) _p[(arg) *_STRIDE] namespace detail { /** * @brief Calculate the Jacobian matrix using finite central differences. * * Creates the Jacobian matrix by computing partial derivatives by finite * central differences. If the column variable is nonzero, an increment of 2% of * the variable is used. STEP is the minimum increment allowed; it is currently * set to 1.0E-6. * * @param n number of variables * @param x pointer to array of addresses of the solution vector elements * @param p array of parameter values * @param func callable that computes the deviation from zero of each equation * in the model * @param value pointer to array of addresses of function values * @param[out] jacobian computed jacobian matrix */ template void nrn_buildjacobian_thread(NewtonSpace* ns, int n, int* index, F const& func, double* value, double** jacobian, _threadargsproto_) { double* high_value = ns->high_value; double* low_value = ns->low_value; /* Compute partial derivatives by central finite differences */ for (int j = 0; j < n; j++) { double increment = std::max(std::fabs(0.02 * (scopmath_newton_x(index[j]))), STEP); scopmath_newton_x(index[j]) += increment; func(_threadargs_); // std::invoke in C++17 for (int i = 0; i < n; i++) high_value[scopmath_newton_ix(i)] = value[scopmath_newton_ix(i)]; scopmath_newton_x(index[j]) -= 2.0 * increment; func(_threadargs_); // std::invoke in C++17 for (int i = 0; i < n; i++) { low_value[scopmath_newton_ix(i)] = value[scopmath_newton_ix(i)]; /* Insert partials into jth column of Jacobian matrix */ jacobian[i][scopmath_newton_ix(j)] = (high_value[scopmath_newton_ix(i)] - low_value[scopmath_newton_ix(i)]) / (2.0 * increment); } /* Restore original variable and function values. */ scopmath_newton_x(index[j]) += increment; func(_threadargs_); // std::invoke in C++17 } } #undef scopmath_newton_x } // namespace detail /** * Iteratively solves simultaneous nonlinear equations by Newton's method, using * a Jacobian matrix computed by finite differences. * * @return 0 if no error; 2 if matrix is singular or ill-conditioned; 1 if * maximum iterations exceeded. * @param n number of variables to solve for * @param x pointer to array of the solution vector elements possibly indexed by * index * @param p array of parameter values * @param func callable that computes the deviation from zero of each equation * in the model * @param value pointer to array to array of the function values * @param[out] x contains the solution value or the most recent iteration's * result in the event of an error. */ template inline int nrn_newton_thread(NewtonSpace* ns, int n, int* s, F func, double* value, _threadargsproto_) { int count = 0, error = 0; double change = 1.0, max_dev, temp; int done = 0; /* * Create arrays for Jacobian, variable increments, function values, and * permutation vector */ double* delta_x = ns->delta_x; double** jacobian = ns->jacobian; int* perm = ns->perm; /* Iteration loop */ while (!done) { if (count++ >= MAXITERS) { error = EXCEED_ITERS; done = 2; } if (!done && change > MAXCHANGE) { /* * Recalculate Jacobian matrix if solution has changed by more * than MAXCHANGE */ detail::nrn_buildjacobian_thread(ns, n, s, func, value, jacobian, _threadargs_); for (int i = 0; i < n; i++) value[scopmath_newton_ix(i)] = -value[scopmath_newton_ix(i)]; /* Required correction * to * function values */ error = nrn_crout_thread(ns, n, jacobian, perm, _threadargs_); if (error != SUCCESS) { done = 2; } } if (!done) { nrn_scopmath_solve_thread(n, jacobian, value, perm, delta_x, (int*) 0, _threadargs_); /* Update solution vector and compute norms of delta_x and value */ change = 0.0; if (s) { for (int i = 0; i < n; i++) { if (std::fabs(scopmath_newton_s(i)) > ZERO && (temp = std::fabs(delta_x[scopmath_newton_ix(i)] / (scopmath_newton_s(i)))) > change) change = temp; scopmath_newton_s(i) += delta_x[scopmath_newton_ix(i)]; } } else { for (int i = 0; i < n; i++) { if (std::fabs(scopmath_newton_s(i)) > ZERO && (temp = std::fabs(delta_x[scopmath_newton_ix(i)] / (scopmath_newton_s(i)))) > change) change = temp; scopmath_newton_s(i) += delta_x[scopmath_newton_ix(i)]; } } // Evaulate function values with new solution. func(_threadargs_); // std::invoke in C++17 max_dev = 0.0; for (int i = 0; i < n; i++) { value[scopmath_newton_ix(i)] = -value[scopmath_newton_ix(i)]; /* Required correction * to function * values */ if ((temp = std::fabs(value[scopmath_newton_ix(i)])) > max_dev) max_dev = temp; } /* Check for convergence or maximum iterations */ if (change <= CONVERGE && max_dev <= ZERO) { // break; done = 1; } } } /* end of while loop */ return (error); } #undef scopmath_newton_ix #undef scopmath_newton_s NewtonSpace* nrn_cons_newtonspace(int n, int n_instance); void nrn_destroy_newtonspace(NewtonSpace* ns); } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/sparse_thread.hpp ================================================ /* # ============================================================================= # Originally sparse.c from SCoP library, Copyright (c) 1989-90 Duke University # ============================================================================= # Subsequent extensive prototype and memory layout changes for CoreNEURON # # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/mechanism/mech/mod2c_core_thread.hpp" #include "coreneuron/sim/scopmath/errcodes.h" namespace coreneuron { namespace scopmath { namespace sparse { // Methods that may be called from offloaded regions are declared inline. inline void delete_item(Item* item) { item->next->prev = item->prev; item->prev->next = item->next; item->prev = nullptr; item->next = nullptr; } /*link ii before item*/ inline void linkitem(Item* item, Item* ii) { ii->prev = item->prev; ii->next = item; item->prev = ii; ii->prev->next = ii; } inline void insert(SparseObj* so, Item* item) { Item* ii{}; for (ii = so->orderlist->next; ii != so->orderlist; ii = ii->next) { if (ii->norder >= item->norder) { break; } } linkitem(ii, item); } /* note: solution order refers to the following diag[varord[row]]->row = row = diag[varord[row]]->col rowst[varord[row]]->row = row varord[el->row] < varord[el->c_right->row] varord[el->col] < varord[el->r_down->col] */ inline void increase_order(SparseObj* so, unsigned row) { /* order of row increases by 1. Maintain the orderlist. */ if (!so->do_flag) return; Item* order = so->roworder[row]; delete_item(order); order->norder++; insert(so, order); } /** * Return pointer to (row, col) element maintaining order in rows. * * See check_assert in minorder for info about how this matrix is supposed to * look. If new_elem is nonzero and an element would otherwise be created, new * is used instead. This is because linking an element is highly nontrivial. The * biggest difference is that elements are no longer removed and this saves much * time allocating and freeing during the solve phase. */ template Elm* getelm(SparseObj* so, unsigned row, unsigned col, Elm* new_elem) { Elm *el, *elnext; unsigned vrow = so->varord[row]; unsigned vcol = so->varord[col]; if (vrow == vcol) { return so->diag[vrow]; /* a common case */ } if (vrow > vcol) { /* in the lower triangle */ /* search downward from diag[vcol] */ for (el = so->diag[vcol];; el = elnext) { elnext = el->r_down; if (!elnext) { break; } else if (elnext->row == row) { /* found it */ return elnext; } else if (so->varord[elnext->row] > vrow) { break; } } /* insert below el */ if (!new_elem) { if constexpr (code_to_enable == enabled_code::compute_only) { // Dynamic allocation should not happen during the compute phase. assert(false); } else { new_elem = new Elm{}; new_elem->value = new double[so->_cntml_padded]; increase_order(so, row); } } new_elem->r_down = el->r_down; el->r_down = new_elem; new_elem->r_up = el; if (new_elem->r_down) { new_elem->r_down->r_up = new_elem; } /* search leftward from diag[vrow] */ for (el = so->diag[vrow];; el = elnext) { elnext = el->c_left; if (!elnext) { break; } else if (so->varord[elnext->col] < vcol) { break; } } /* insert to left of el */ new_elem->c_left = el->c_left; el->c_left = new_elem; new_elem->c_right = el; if (new_elem->c_left) { new_elem->c_left->c_right = new_elem; } else { so->rowst[vrow] = new_elem; } } else { /* in the upper triangle */ /* search upward from diag[vcol] */ for (el = so->diag[vcol];; el = elnext) { elnext = el->r_up; if (!elnext) { break; } else if (elnext->row == row) { /* found it */ return elnext; } else if (so->varord[elnext->row] < vrow) { break; } } /* insert above el */ if (!new_elem) { if constexpr (code_to_enable == enabled_code::compute_only) { assert(false); } else { new_elem = new Elm{}; new_elem->value = new double[so->_cntml_padded]; increase_order(so, row); } } new_elem->r_up = el->r_up; el->r_up = new_elem; new_elem->r_down = el; if (new_elem->r_up) { new_elem->r_up->r_down = new_elem; } /* search right from diag[vrow] */ for (el = so->diag[vrow];; el = elnext) { elnext = el->c_right; if (!elnext) { break; } else if (so->varord[elnext->col] > vcol) { break; } } /* insert to right of el */ new_elem->c_right = el->c_right; el->c_right = new_elem; new_elem->c_left = el; if (new_elem->c_right) { new_elem->c_right->c_left = new_elem; } } new_elem->row = row; new_elem->col = col; return new_elem; } /** * The following routines support the concept of a list. Modified from modl. The * list is a doubly linked list. A special item with element 0 is always at the * tail of the list and is denoted as the List pointer itself. list->next point * to the first item in the list and list->prev points to the last item in the * list. i.e. the list is circular. Note that in an empty list next and prev * points to itself. * * It is intended that this implementation be hidden from the user via the * following function calls. */ inline List* newlist() { auto* ii = new Item{}; ii->prev = ii; ii->next = ii; return ii; } /*free the list but not the elements*/ inline void freelist(List* list) { Item* i2; for (Item* i1 = list->next; i1 != list; i1 = i2) { i2 = i1->next; delete i1; } delete list; } inline void check_assert(SparseObj* so) { /* check that all links are consistent */ for (unsigned i = 1; i <= so->neqn; i++) { assert(so->diag[i]); assert(so->diag[i]->row == so->diag[i]->col); assert(so->varord[so->diag[i]->row] == i); assert(so->rowst[i]->row == so->diag[i]->row); for (Elm* el = so->rowst[i]; el; el = el->c_right) { if (el == so->rowst[i]) { assert(el->c_left == nullptr); } else { assert(el->c_left->c_right == el); assert(so->varord[el->c_left->col] < so->varord[el->col]); } } for (Elm* el = so->diag[i]->r_down; el; el = el->r_down) { assert(el->r_up->r_down == el); assert(so->varord[el->r_up->row] < so->varord[el->row]); } for (Elm* el = so->diag[i]->r_up; el; el = el->r_up) { assert(el->r_down->r_up == el); assert(so->varord[el->r_down->row] > so->varord[el->row]); } } } /* at this point row links are out of order for diag[i]->col and col links are out of order for diag[i]->row */ inline void re_link(SparseObj* so, unsigned i) { for (Elm* el = so->rowst[i]; el; el = el->c_right) { /* repair hole */ if (el->r_up) el->r_up->r_down = el->r_down; if (el->r_down) el->r_down->r_up = el->r_up; } for (Elm* el = so->diag[i]->r_down; el; el = el->r_down) { /* repair hole */ if (el->c_right) el->c_right->c_left = el->c_left; if (el->c_left) el->c_left->c_right = el->c_right; else so->rowst[so->varord[el->row]] = el->c_right; } for (Elm* el = so->diag[i]->r_up; el; el = el->r_up) { /* repair hole */ if (el->c_right) el->c_right->c_left = el->c_left; if (el->c_left) el->c_left->c_right = el->c_right; else so->rowst[so->varord[el->row]] = el->c_right; } /* matrix is consistent except that diagonal row elements are unlinked from their columns and the diagonal column elements are unlinked from their rows. For simplicity discard all knowledge of links and use getelm to relink */ Elm *dright, *dleft, *dup, *ddown, *elnext; so->rowst[i] = so->diag[i]; dright = so->diag[i]->c_right; dleft = so->diag[i]->c_left; dup = so->diag[i]->r_up; ddown = so->diag[i]->r_down; so->diag[i]->c_right = so->diag[i]->c_left = nullptr; so->diag[i]->r_up = so->diag[i]->r_down = nullptr; for (Elm* el = dright; el; el = elnext) { elnext = el->c_right; getelm(so, el->row, el->col, el); } for (Elm* el = dleft; el; el = elnext) { elnext = el->c_left; getelm(so, el->row, el->col, el); } for (Elm* el = dup; el; el = elnext) { elnext = el->r_up; getelm(so, el->row, el->col, el); } for (Elm* el = ddown; el; el = elnext) { elnext = el->r_down; getelm(so, el->row, el->col, el); } } inline void free_elm(SparseObj* so) { /* free all elements */ for (unsigned i = 1; i <= so->neqn; i++) { so->rowst[i] = nullptr; so->diag[i] = nullptr; } } inline void init_minorder(SparseObj* so) { /* matrix has been set up. Construct the orderlist and orderfind vector. */ so->do_flag = 1; if (so->roworder) { for (unsigned i = 1; i <= so->nroworder; ++i) { delete so->roworder[i]; } delete[] so->roworder; } so->roworder = new Item* [so->neqn + 1] {}; so->nroworder = so->neqn; if (so->orderlist) { freelist(so->orderlist); } so->orderlist = newlist(); for (unsigned i = 1; i <= so->neqn; i++) { so->roworder[i] = new Item{}; } for (unsigned i = 1; i <= so->neqn; i++) { unsigned j = 0; for (auto el = so->rowst[i]; el; el = el->c_right) { j++; } so->roworder[so->diag[i]->row]->elm = so->diag[i]; so->roworder[so->diag[i]->row]->norder = j; insert(so, so->roworder[so->diag[i]->row]); } } inline void reduce_order(SparseObj* so, unsigned row) { /* order of row decreases by 1. Maintain the orderlist. */ if (!so->do_flag) return; Item* order = so->roworder[row]; delete_item(order); order->norder--; insert(so, order); } inline void get_next_pivot(SparseObj* so, unsigned i) { /* get varord[i], etc. from the head of the orderlist. */ Item* order = so->orderlist->next; assert(order != so->orderlist); unsigned j; if ((j = so->varord[order->elm->row]) != i) { /* push order lists down by 1 and put new diag in empty slot */ assert(j > i); Elm* el = so->rowst[j]; for (; j > i; j--) { so->diag[j] = so->diag[j - 1]; so->rowst[j] = so->rowst[j - 1]; so->varord[so->diag[j]->row] = j; } so->diag[i] = order->elm; so->rowst[i] = el; so->varord[so->diag[i]->row] = i; /* at this point row links are out of order for diag[i]->col and col links are out of order for diag[i]->row */ re_link(so, i); } /* now make sure all needed elements exist */ for (Elm* el = so->diag[i]->r_down; el; el = el->r_down) { for (Elm* pivot = so->diag[i]->c_right; pivot; pivot = pivot->c_right) { getelm(so, el->row, pivot->col, nullptr); } reduce_order(so, el->row); } delete_item(order); } /* reallocate space for matrix */ inline void initeqn(SparseObj* so, unsigned maxeqn) { if (maxeqn == so->neqn) return; free_elm(so); so->neqn = maxeqn; delete[] so->rowst; delete[] so->diag; delete[] so->varord; delete[] so->rhs; delete[] so->ngetcall; so->elmpool = nullptr; so->rowst = new Elm*[maxeqn + 1]; so->diag = new Elm*[maxeqn + 1]; so->varord = new unsigned[maxeqn + 1]; so->rhs = new double[(maxeqn + 1) * so->_cntml_padded]; so->ngetcall = new unsigned[so->_cntml_padded]; for (unsigned i = 1; i <= maxeqn; i++) { so->varord[i] = i; so->diag[i] = new Elm{}; so->diag[i]->value = new double[so->_cntml_padded]; so->rowst[i] = so->diag[i]; so->diag[i]->row = i; so->diag[i]->col = i; so->diag[i]->r_down = so->diag[i]->r_up = nullptr; so->diag[i]->c_right = so->diag[i]->c_left = nullptr; } unsigned nn = so->neqn * so->_cntml_padded; for (unsigned i = 0; i < nn; ++i) { so->rhs[i] = 0.; } } /** * Minimum ordering algorithm to determine the order that the matrix should be * solved. Also make sure all needed elements are present. This does not mess up * the matrix. */ inline void spar_minorder(SparseObj* so) { check_assert(so); init_minorder(so); for (unsigned i = 1; i <= so->neqn; i++) { get_next_pivot(so, i); } so->do_flag = 0; check_assert(so); } inline void init_coef_list(SparseObj* so, int _iml) { so->ngetcall[_iml] = 0; for (unsigned i = 1; i <= so->neqn; i++) { for (Elm* el = so->rowst[i]; el; el = el->c_right) { el->value[_iml] = 0.; } } } #if defined(scopmath_sparse_d) || defined(scopmath_sparse_ix) || defined(scopmath_sparse_s) || \ defined(scopmath_sparse_x) #error "naming clash on sparse_thread.hpp-internal macros" #endif #define scopmath_sparse_ix(arg) ((arg) *_STRIDE) inline void subrow(SparseObj* so, Elm* pivot, Elm* rowsub, int _iml) { unsigned int const _cntml_padded{so->_cntml_padded}; double const r{rowsub->value[_iml] / pivot->value[_iml]}; so->rhs[scopmath_sparse_ix(rowsub->row)] -= so->rhs[scopmath_sparse_ix(pivot->row)] * r; so->numop++; for (auto el = pivot->c_right; el; el = el->c_right) { for (rowsub = rowsub->c_right; rowsub->col != el->col; rowsub = rowsub->c_right) { } rowsub->value[_iml] -= el->value[_iml] * r; so->numop++; } } inline void bksub(SparseObj* so, int _iml) { int _cntml_padded = so->_cntml_padded; for (unsigned i = so->neqn; i >= 1; i--) { for (Elm* el = so->diag[i]->c_right; el; el = el->c_right) { so->rhs[scopmath_sparse_ix(el->row)] -= el->value[_iml] * so->rhs[scopmath_sparse_ix(el->col)]; so->numop++; } so->rhs[scopmath_sparse_ix(so->diag[i]->row)] /= so->diag[i]->value[_iml]; so->numop++; } } inline int matsol(SparseObj* so, int _iml) { /* Upper triangularization */ so->numop = 0; for (unsigned i = 1; i <= so->neqn; i++) { Elm* pivot{so->diag[i]}; if (fabs(pivot->value[_iml]) <= ROUNDOFF) { return SINGULAR; } // Eliminate all elements in pivot column. The OpenACC annotation here // is to avoid problems with nvc++'s automatic paralellisation; see: // https://forums.developer.nvidia.com/t/device-kernel-hangs-at-o-and-above/212733 nrn_pragma_acc(loop seq) for (auto el = pivot->r_down; el; el = el->r_down) { subrow(so, pivot, el, _iml); } } bksub(so, _iml); return SUCCESS; } template void create_coef_list(SparseObj* so, int n, SPFUN fun, _threadargsproto_) { initeqn(so, (unsigned) n); so->phase = 1; so->ngetcall[0] = 0; fun(so, so->rhs, _threadargs_); // std::invoke in C++17 if (so->coef_list) { free(so->coef_list); } so->coef_list_size = so->ngetcall[0]; so->coef_list = new double*[so->coef_list_size]; spar_minorder(so); so->phase = 2; so->ngetcall[0] = 0; fun(so, so->rhs, _threadargs_); // std::invoke in C++17 so->phase = 0; } template double* thread_getelm(SparseObj* so, int row, int col, int _iml) { if (!so->phase) { return so->coef_list[so->ngetcall[_iml]++]; } Elm* el = scopmath::sparse::getelm(so, (unsigned) row, (unsigned) col, nullptr); if (so->phase == 1) { so->ngetcall[_iml]++; } else { so->coef_list[so->ngetcall[_iml]++] = el->value; } return el->value; } } // namespace sparse } // namespace scopmath // Methods that may be called from translated MOD files are kept outside the // scopmath::sparse namespace. #define scopmath_sparse_s(arg) _p[scopmath_sparse_ix(s[arg])] #define scopmath_sparse_d(arg) _p[scopmath_sparse_ix(d[arg])] /** * sparse matrix dynamic allocation: create_coef_list makes a list for fast * setup, does minimum ordering and ensures all elements needed are present. * This could easily be made recursive but it isn't right now. */ template void* nrn_cons_sparseobj(SPFUN fun, int n, Memb_list* ml, _threadargsproto_) { // fill in the unset _threadargsproto_ assuming _iml = 0; _iml = 0; /* from _threadargsproto_ */ _p = ml->data; _ppvar = ml->pdata; _v = _nt->_actual_v[ml->nodeindices[_iml]]; SparseObj* so{new SparseObj}; so->_cntml_padded = _cntml_padded; scopmath::sparse::create_coef_list(so, n, fun, _threadargs_); nrn_sparseobj_copyto_device(so); return so; } /** * This is an experimental numerical method for SCoP-3 which integrates kinetic * rate equations. It is intended to be used only by models generated by MODL, * and its identity is meant to be concealed from the user. * * @param n number of state variables * @param s array of pointers to the state variables * @param d array of pointers to the derivatives of states * @param t pointer to the independent variable * @param dt the time step * @param fun callable corresponding to the kinetic block equations * @param prhs pointer to right hand side vector (answer on return) does not * have to be allocated by caller. (this is no longer quite right) * @param linflag solve as linear equations, when nonlinear, all states are * forced >= 0 */ template int sparse_thread(SparseObj* so, int n, int* s, int* d, double* t, double dt, F fun, int linflag, _threadargsproto_) { int i, j, ierr; double err; for (i = 0; i < n; i++) { /*save old state*/ scopmath_sparse_d(i) = scopmath_sparse_s(i); } for (err = 1, j = 0; err > CONVERGE; j++) { scopmath::sparse::init_coef_list(so, _iml); fun(so, so->rhs, _threadargs_); // std::invoke in C++17 if ((ierr = scopmath::sparse::matsol(so, _iml))) { return ierr; } for (err = 0., i = 1; i <= n; i++) { /* why oh why did I write it from 1 */ scopmath_sparse_s(i - 1) += so->rhs[scopmath_sparse_ix(i)]; if (!linflag && scopmath_sparse_s(i - 1) < 0.) { scopmath_sparse_s(i - 1) = 0.; } err += fabs(so->rhs[scopmath_sparse_ix(i)]); } if (j > MAXSTEPS) { return EXCEED_ITERS; } if (linflag) break; } scopmath::sparse::init_coef_list(so, _iml); fun(so, so->rhs, _threadargs_); // std::invoke in C++17 for (i = 0; i < n; i++) { /*restore Dstate at t+dt*/ scopmath_sparse_d(i) = (scopmath_sparse_s(i) - scopmath_sparse_d(i)) / dt; } return SUCCESS; } #undef scopmath_sparse_d #undef scopmath_sparse_ix #undef scopmath_sparse_s #define scopmath_sparse_x(arg) _p[x[arg] * _STRIDE] /* for solving ax=b */ template int _cvode_sparse_thread(void** vpr, int n, int* x, SPFUN fun, _threadargsproto_) { SparseObj* so = (SparseObj*) (*vpr); if (!so) { so = new SparseObj{}; *vpr = so; } scopmath::sparse::create_coef_list(so, n, fun, _threadargs_); /* calls fun twice */ scopmath::sparse::init_coef_list(so, _iml); fun(so, so->rhs, _threadargs_); // std::invoke in C++17 int ierr; if ((ierr = scopmath::sparse::matsol(so, _iml))) { return ierr; } for (int i = 1; i <= n; i++) { /* why oh why did I write it from 1 */ scopmath_sparse_x(i - 1) = so->rhs[i]; } return SUCCESS; } #undef scopmath_sparse_x inline void _nrn_destroy_sparseobj_thread(SparseObj* so) { if (!so) { return; } nrn_sparseobj_delete_from_device(so); delete[] so->rowst; delete[] so->diag; delete[] so->varord; delete[] so->rhs; delete[] so->coef_list; if (so->roworder) { for (int ii = 1; ii <= so->nroworder; ++ii) { delete so->roworder[ii]; } delete[] so->roworder; } if (so->orderlist) { scopmath::sparse::freelist(so->orderlist); } delete so; } } // namespace coreneuron ================================================ FILE: coreneuron/sim/scopmath/ssimplic_thread.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include "coreneuron/mechanism/mech/mod2c_core_thread.hpp" namespace coreneuron { #if defined(scopmath_ssimplic_s) #error "naming clash on ssimplic_thread.hpp-internal macros" #endif #define scopmath_ssimplic_s(arg) _p[s[arg] * _STRIDE] static int check_state(int n, int* s, _threadargsproto_) { bool flag{true}; for (int i = 0; i < n; i++) { if (scopmath_ssimplic_s(i) < -1e-6) { scopmath_ssimplic_s(i) = 0.; flag = false; } } return flag; } #undef scopmath_ssimplic_s template int _ss_sparse_thread(SparseObj* so, int n, int* s, int* d, double* t, double dt, SPFUN fun, int linflag, _threadargsproto_) { int err; double ss_dt{1e9}; _nt->_dt = ss_dt; if (linflag) { /*iterate linear solution*/ err = sparse_thread(so, n, s, d, t, ss_dt, fun, 0, _threadargs_); } else { int ii{7}; err = 0; while (ii) { err = sparse_thread(so, n, s, d, t, ss_dt, fun, 1, _threadargs_); if (!err) { if (check_state(n, s, _threadargs_)) { err = sparse_thread(so, n, s, d, t, ss_dt, fun, 0, _threadargs_); } } --ii; if (!err) { ii = 0; } } } _nt->_dt = dt; return err; } template int _ss_derivimplicit_thread(int n, int* slist, int* dlist, DIFUN fun, _threadargsproto_) { double const dtsav{_nt->_dt}; _nt->_dt = 1e-9; int err = fun(_threadargs_); _nt->_dt = dtsav; return err; } } // namespace coreneuron ================================================ FILE: coreneuron/sim/solve_core.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/nrnconf.h" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/sim/multicore.hpp" namespace coreneuron { bool use_solve_interleave; static void triang(NrnThread*), bksub(NrnThread*); /* solve the matrix equation */ void nrn_solve_minimal(NrnThread* _nt) { if (use_solve_interleave) { solve_interleaved(_nt->id); } else { triang(_nt); bksub(_nt); } } /** @todo OpenACC GPU offload is sequential/slow. Because --cell-permute=0 and * --gpu is forbidden anyway, no OpenMP target offload equivalent is implemented. */ /* triangularization of the matrix equations */ static void triang(NrnThread* _nt) { int i2 = _nt->ncell; int i3 = _nt->end; double* vec_a = &(VEC_A(0)); double* vec_b = &(VEC_B(0)); double* vec_d = &(VEC_D(0)); double* vec_rhs = &(VEC_RHS(0)); int* parent_index = _nt->_v_parent_index; nrn_pragma_acc(parallel loop seq present( vec_a [0:i3], vec_b [0:i3], vec_d [0:i3], vec_rhs [0:i3], parent_index [0:i3]) async(_nt->stream_id) if (_nt->compute_gpu)) nrn_pragma_omp(target if (_nt->compute_gpu)) for (int i = i3 - 1; i >= i2; --i) { double p = vec_a[i] / vec_d[i]; vec_d[parent_index[i]] -= p * vec_b[i]; vec_rhs[parent_index[i]] -= p * vec_rhs[i]; } } /* back substitution to finish solving the matrix equations */ static void bksub(NrnThread* _nt) { int i1 = 0; int i2 = i1 + _nt->ncell; int i3 = _nt->end; double* vec_b = &(VEC_B(0)); double* vec_d = &(VEC_D(0)); double* vec_rhs = &(VEC_RHS(0)); int* parent_index = _nt->_v_parent_index; nrn_pragma_acc(parallel loop seq present(vec_d [0:i2], vec_rhs [0:i2]) async(_nt->stream_id) if (_nt->compute_gpu)) nrn_pragma_omp(target if (_nt->compute_gpu)) for (int i = i1; i < i2; ++i) { vec_rhs[i] /= vec_d[i]; } nrn_pragma_acc( parallel loop seq present(vec_b [0:i3], vec_d [0:i3], vec_rhs [0:i3], parent_index [0:i3]) async(_nt->stream_id) if (_nt->compute_gpu)) nrn_pragma_omp(target if (_nt->compute_gpu)) for (int i = i2; i < i3; ++i) { vec_rhs[i] -= vec_b[i] * vec_rhs[parent_index[i]]; vec_rhs[i] /= vec_d[i]; } if (_nt->compute_gpu) { nrn_pragma_acc(wait(_nt->stream_id)) } } } // namespace coreneuron ================================================ FILE: coreneuron/sim/treeset_core.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/utils/profile/profiler_interface.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" namespace coreneuron { /* Fixed step method with threads and cache efficiency. No extracellular, sparse matrix, multisplit, or legacy features. */ static void nrn_rhs(NrnThread* _nt) { int i1 = 0; int i2 = i1 + _nt->ncell; int i3 = _nt->end; double* vec_rhs = &(VEC_RHS(0)); double* vec_d = &(VEC_D(0)); double* vec_a = &(VEC_A(0)); double* vec_b = &(VEC_B(0)); double* vec_v = &(VEC_V(0)); int* parent_index = _nt->_v_parent_index; nrn_pragma_acc(parallel loop present(vec_rhs [0:i3], vec_d [0:i3]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)) for (int i = i1; i < i3; ++i) { vec_rhs[i] = 0.; vec_d[i] = 0.; } if (_nt->nrn_fast_imem) { double* fast_imem_d = _nt->nrn_fast_imem->nrn_sav_d; double* fast_imem_rhs = _nt->nrn_fast_imem->nrn_sav_rhs; nrn_pragma_acc( parallel loop present(fast_imem_d [i1:i3], fast_imem_rhs [i1:i3]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)) for (int i = i1; i < i3; ++i) { fast_imem_d[i] = 0.; fast_imem_rhs[i] = 0.; } } nrn_ba(_nt, BEFORE_BREAKPOINT); /* note that CAP has no current */ for (auto tml = _nt->tml; tml; tml = tml->next) if (corenrn.get_memb_func(tml->index).current) { mod_f_t s = corenrn.get_memb_func(tml->index).current; std::string ss("cur-"); ss += nrn_get_mechname(tml->index); Instrumentor::phase p(ss.c_str()); (*s)(_nt, tml->ml, tml->index); #ifdef DEBUG if (errno) { hoc_warning("errno set during calculation of currents", nullptr); } #endif } if (_nt->nrn_fast_imem) { /* _nrn_save_rhs has only the contribution of electrode current so here we transform so it only has membrane current contribution */ double* p = _nt->nrn_fast_imem->nrn_sav_rhs; nrn_pragma_acc(parallel loop present(p, vec_rhs) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)) for (int i = i1; i < i3; ++i) { p[i] -= vec_rhs[i]; } } /* now the internal axial currents. The extracellular mechanism contribution is already done. rhs += ai_j*(vi_j - vi) */ nrn_pragma_acc(parallel loop present(vec_rhs [0:i3], vec_d [0:i3], vec_a [0:i3], vec_b [0:i3], vec_v [0:i3], parent_index [0:i3]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)) for (int i = i2; i < i3; ++i) { double dv = vec_v[parent_index[i]] - vec_v[i]; /* our connection coefficients are negative so */ nrn_pragma_acc(atomic update) nrn_pragma_omp(atomic update) vec_rhs[i] -= vec_b[i] * dv; nrn_pragma_acc(atomic update) nrn_pragma_omp(atomic update) vec_rhs[parent_index[i]] += vec_a[i] * dv; } } /* calculate left hand side of cm*dvm/dt = -i(vm) + is(vi) + ai_j*(vi_j - vi) cx*dvx/dt - cm*dvm/dt = -gx*(vx - ex) + i(vm) + ax_j*(vx_j - vx) with a matrix so that the solution is of the form [dvm+dvx,dvx] on the right hand side after solving. This is a common operation for fixed step, cvode, and daspk methods */ static void nrn_lhs(NrnThread* _nt) { int i1 = 0; int i2 = i1 + _nt->ncell; int i3 = _nt->end; /* note that CAP has no jacob */ for (auto tml = _nt->tml; tml; tml = tml->next) if (corenrn.get_memb_func(tml->index).jacob) { mod_f_t s = corenrn.get_memb_func(tml->index).jacob; std::string ss("cur-"); ss += nrn_get_mechname(tml->index); Instrumentor::phase p(ss.c_str()); (*s)(_nt, tml->ml, tml->index); #ifdef DEBUG if (errno) { hoc_warning("errno set during calculation of jacobian", (char*) 0); } #endif } /* now the cap current can be computed because any change to cm by another model has taken effect */ /* note, the first is CAP if there are any nodes*/ if (_nt->end && _nt->tml) { assert(_nt->tml->index == CAP); nrn_jacob_capacitance(_nt, _nt->tml->ml, _nt->tml->index); } double* vec_d = &(VEC_D(0)); double* vec_a = &(VEC_A(0)); double* vec_b = &(VEC_B(0)); int* parent_index = _nt->_v_parent_index; if (_nt->nrn_fast_imem) { /* _nrn_save_d has only the contribution of electrode current so here we transform so it only has membrane current contribution */ double* p = _nt->nrn_fast_imem->nrn_sav_d; nrn_pragma_acc(parallel loop present(p, vec_d) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)) for (int i = i1; i < i3; ++i) { p[i] += vec_d[i]; } } /* now add the axial currents */ nrn_pragma_acc(parallel loop present( vec_d [0:i3], vec_a [0:i3], vec_b [0:i3], parent_index [0:i3]) if (_nt->compute_gpu) async(_nt->stream_id)) nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu)) for (int i = i2; i < i3; ++i) { nrn_pragma_acc(atomic update) nrn_pragma_omp(atomic update) vec_d[i] -= vec_b[i]; nrn_pragma_acc(atomic update) nrn_pragma_omp(atomic update) vec_d[parent_index[i]] -= vec_a[i]; } } /* for the fixed step method */ void* setup_tree_matrix_minimal(NrnThread* _nt) { nrn_rhs(_nt); nrn_lhs(_nt); return nullptr; } } // namespace coreneuron ================================================ FILE: coreneuron/utils/ivocvect.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/utils/ivocvect.hpp" #include "coreneuron/utils/offload.hpp" namespace coreneuron { IvocVect* vector_new(int n) { return new IvocVect(n); } int vector_capacity(IvocVect* v) { return v->size(); } double* vector_vec(IvocVect* v) { return v->data(); } /* * Retro-compatibility implementations */ IvocVect* vector_new1(int n) { return new IvocVect(n); } nrn_pragma_acc(routine seq) int vector_capacity(void* v) { return ((IvocVect*) v)->size(); } nrn_pragma_acc(routine seq) double* vector_vec(void* v) { return ((IvocVect*) v)->data(); } } // namespace coreneuron ================================================ FILE: coreneuron/utils/ivocvect.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/utils/offload.hpp" #include #include namespace coreneuron { template class fixed_vector { size_t n_; public: T* data_; /*making public for openacc copying */ fixed_vector() = default; fixed_vector(size_t n) : n_(n) { data_ = new T[n_]; } fixed_vector(const fixed_vector& vec) = delete; fixed_vector& operator=(const fixed_vector& vec) = delete; fixed_vector(fixed_vector&& vec) : n_{vec.n_} , data_{nullptr} { std::swap(data_, vec.data_); } fixed_vector& operator=(fixed_vector&& vec) { data_ = nullptr; std::swap(data_, vec.data_); n_ = vec.n_; return *this; } ~fixed_vector() { delete[] data_; } const T& operator[](int i) const { return data_[i]; } T& operator[](int i) { return data_[i]; } nrn_pragma_acc(routine seq) const T* data(void) const { return data_; } nrn_pragma_acc(routine seq) T* data(void) { return data_; } nrn_pragma_acc(routine seq) size_t size() const { return n_; } }; using IvocVect = fixed_vector; extern IvocVect* vector_new(int n); extern int vector_capacity(IvocVect* v); extern double* vector_vec(IvocVect* v); // retro-compatibility API extern IvocVect* vector_new1(int n); nrn_pragma_acc(routine seq) extern int vector_capacity(void* v); nrn_pragma_acc(routine seq) extern double* vector_vec(void* v); } // namespace coreneuron ================================================ FILE: coreneuron/utils/lpt.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include #include #include #include #include "coreneuron/nrnconf.h" // for size_t #include "coreneuron/utils/lpt.hpp" #include "coreneuron/utils/nrn_assert.h" using P = std::pair; // lpt Least Processing Time algorithm. // Largest piece goes into least size bag. // in: number of bags, vector of sizes // return: a new vector of bag indices parallel to the vector of sizes. std::vector lpt(std::size_t nbag, std::vector& pieces, double* bal) { nrn_assert(nbag > 0); nrn_assert(!pieces.empty()); std::vector

pvec; for (size_t i = 0; i < pieces.size(); ++i) { pvec.push_back(P(i, pieces[i])); } auto P_comp = [](const P& a, const P& b) { return a.second > b.second; }; std::sort(pvec.begin(), pvec.end(), P_comp); std::vector bagindices(pieces.size()); std::priority_queue, decltype(P_comp)> bagq(P_comp); for (size_t i = 0; i < nbag; ++i) { bagq.push(P(i, 0)); } for (const auto& p: pvec) { P bagqitem = bagq.top(); bagq.pop(); bagindices[p.first] = bagqitem.first; bagqitem.second += p.second; bagq.push(bagqitem); } // load balance average/max (1.0 is perfect) std::vector v(bagq.size()); for (size_t i = 1; i < nbag; ++i) { v[i] = bagq.top().second; bagq.pop(); } double b = load_balance(v); if (bal) { *bal = b; } else { printf("load balance = %g for %ld pieces in %ld bags\n", b, pieces.size(), nbag); } return bagindices; } double load_balance(std::vector& v) { nrn_assert(!v.empty()); std::size_t sum = std::accumulate(v.begin(), v.end(), 0); std::size_t max = *std::max_element(v.begin(), v.end()); return (double(sum) / v.size()) / max; } ================================================ FILE: coreneuron/utils/lpt.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include std::vector lpt(std::size_t nbag, std::vector& pieces, double* bal = nullptr); double load_balance(std::vector&); ================================================ FILE: coreneuron/utils/memory.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/utils/memory.h" #ifdef CORENEURON_ENABLE_GPU #include #endif #include namespace coreneuron { bool gpu_enabled() { #ifdef CORENEURON_ENABLE_GPU return corenrn_param.gpu; #else return false; #endif } void* allocate_unified(std::size_t num_bytes) { #ifdef CORENEURON_ENABLE_GPU // The build supports GPU execution, check if --gpu was passed to actually // enable it. We should not call CUDA APIs in GPU builds if --gpu was not passed. if (corenrn_param.gpu) { // Allocate managed/unified memory. void* ptr{nullptr}; auto const code = cudaMallocManaged(&ptr, num_bytes); assert(code == cudaSuccess); return ptr; } #endif // Either the build does not have GPU support or --gpu was not passed. // Allocate using standard operator new. // When we have C++17 support then propagate `alignment` here. return ::operator new(num_bytes); } void deallocate_unified(void* ptr, std::size_t num_bytes) { // See comments in allocate_unified to understand the different branches. #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { // Deallocate managed/unified memory. auto const code = cudaFree(ptr); assert(code == cudaSuccess); return; } #endif #ifdef __cpp_sized_deallocation ::operator delete(ptr, num_bytes); #else ::operator delete(ptr); #endif } } // namespace coreneuron ================================================ FILE: coreneuron/utils/memory.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include #include "coreneuron/utils/nrn_assert.h" #include "coreneuron/nrniv/nrniv_decl.h" #if !defined(NRN_SOA_BYTE_ALIGN) // for layout 0, every range variable array must be aligned by at least 16 bytes (the size of the // simd memory bus) #define NRN_SOA_BYTE_ALIGN (8 * sizeof(double)) #endif namespace coreneuron { /** * @brief Check if GPU support is enabled. * * This returns true if GPU support was enabled at compile time and at runtime * via coreneuron.gpu = True and/or --gpu, otherwise it returns false. */ bool gpu_enabled(); /** @brief Allocate unified memory in GPU builds iff GPU enabled, otherwise new */ void* allocate_unified(std::size_t num_bytes); /** @brief Deallocate memory allocated by `allocate_unified`. */ void deallocate_unified(void* ptr, std::size_t num_bytes); /** @brief C++ allocator that uses [de]allocate_unified. */ template struct unified_allocator { using value_type = T; unified_allocator() = default; template unified_allocator(unified_allocator const&) noexcept {} value_type* allocate(std::size_t n) { return static_cast(allocate_unified(n * sizeof(value_type))); } void deallocate(value_type* p, std::size_t n) noexcept { deallocate_unified(p, n * sizeof(value_type)); } }; template bool operator==(unified_allocator const&, unified_allocator const&) noexcept { return true; } template bool operator!=(unified_allocator const& x, unified_allocator const& y) noexcept { return !(x == y); } /** @brief Allocator-aware deleter for use with std::unique_ptr. * * This is copied from https://stackoverflow.com/a/23132307. See also * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0316r0.html, * http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0211r3.html, and * boost::allocate_unique<...>. * Hopefully std::allocate_unique will be included in C++23. */ template struct alloc_deleter { alloc_deleter() = default; // OL210813 addition alloc_deleter(const Alloc& a) : a(a) {} using pointer = typename std::allocator_traits::pointer; void operator()(pointer p) const { Alloc aa(a); std::allocator_traits::destroy(aa, std::addressof(*p)); std::allocator_traits::deallocate(aa, p, 1); } private: Alloc a; }; template auto allocate_unique(const Alloc& alloc, Args&&... args) { using AT = std::allocator_traits; static_assert(std::is_same>{}(), "Allocator has the wrong value_type"); Alloc a(alloc); auto p = AT::allocate(a, 1); try { AT::construct(a, std::addressof(*p), std::forward(args)...); using D = alloc_deleter; return std::unique_ptr(p, D(a)); } catch (...) { AT::deallocate(a, p, 1); throw; } } } // namespace coreneuron /// for gpu builds with unified memory support #ifdef CORENEURON_UNIFIED_MEMORY #include // TODO : error handling for CUDA routines inline void alloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) { cudaMallocManaged(&pointer, num_bytes); } inline void calloc_memory(void*& pointer, size_t num_bytes, size_t /*alignment*/) { alloc_memory(pointer, num_bytes, 64); cudaMemset(pointer, 0, num_bytes); } inline void free_memory(void* pointer) { cudaFree(pointer); } /** * A base class providing overloaded new and delete operators for CUDA allocation * * Classes that should be allocated on the GPU should inherit from this class. Additionally they * may need to implement a special copy-construtor. This is documented here: * \link: https://devblogs.nvidia.com/unified-memory-in-cuda-6/ */ class MemoryManaged { public: void* operator new(size_t len) { void* ptr; cudaMallocManaged(&ptr, len); cudaDeviceSynchronize(); return ptr; } void* operator new[](size_t len) { void* ptr; cudaMallocManaged(&ptr, len); cudaDeviceSynchronize(); return ptr; } void operator delete(void* ptr) { cudaDeviceSynchronize(); cudaFree(ptr); } void operator delete[](void* ptr) { cudaDeviceSynchronize(); cudaFree(ptr); } }; /// for cpu builds use posix memalign #else class MemoryManaged { // does nothing by default }; #include inline void alloc_memory(void*& pointer, size_t num_bytes, size_t alignment) { size_t fill = 0; if (alignment > 0) { if (num_bytes % alignment != 0) { size_t multiple = num_bytes / alignment; fill = alignment * (multiple + 1) - num_bytes; } nrn_assert((pointer = std::aligned_alloc(alignment, num_bytes + fill)) != nullptr); } else { nrn_assert((pointer = std::malloc(num_bytes)) != nullptr); } } inline void calloc_memory(void*& pointer, size_t num_bytes, size_t alignment) { alloc_memory(pointer, num_bytes, alignment); memset(pointer, 0, num_bytes); } inline void free_memory(void* pointer) { free(pointer); } #endif namespace coreneuron { /** Independent function to compute the needed chunkding, the chunk argument is the number of doubles the chunk is chunkded upon. */ template inline int soa_padded_size(int cnt, int layout) { int imod = cnt % chunk; if (layout == Layout::AoS) return cnt; if (imod) { int idiv = cnt / chunk; return (idiv + 1) * chunk; } return cnt; } /** Check for the pointer alignment. */ inline bool is_aligned(void* pointer, std::size_t alignment) { return (reinterpret_cast(pointer) % alignment) == 0; } /** * Allocate aligned memory. This will be unified memory if the corresponding * CMake option is set. This must be freed with the free_memory method. * * \param size Size of buffer to allocate in bytes. * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment. */ inline void* emalloc_align(size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) { void* memptr; alloc_memory(memptr, size, alignment); if (alignment != 0) { nrn_assert(is_aligned(memptr, alignment)); } return memptr; } /** * Allocate the aligned memory and set it to 0. This will be unified memory if * the corresponding CMake option is set. This must be freed with the * free_memory method. * * \param n Number of objects to allocate * \param size Size of buffer for each object to allocate in bytes. * \param alignment Memory alignment, defaults to NRN_SOA_BYTE_ALIGN. Pass 0 for no alignment. * * \note the allocated size will be \code n*size */ inline void* ecalloc_align(size_t n, size_t size, size_t alignment = NRN_SOA_BYTE_ALIGN) { void* p; if (n == 0) { return nullptr; } calloc_memory(p, n * size, alignment); if (alignment != 0) { nrn_assert(is_aligned(p, alignment)); } return p; } } // namespace coreneuron ================================================ FILE: coreneuron/utils/memory_utils.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /** * @file memory_utils.cpp * @date 25th Oct 2014 * * @brief Provides functionality to report current memory usage * of the simulator using interface provided by malloc.h * * Memory utilisation report is based on the use of mallinfo * interface defined in malloc.h. For 64 bit platform, this * is not portable and hence it will be replaced with new * glibc implementation of malloc_info. * * @see http://man7.org/linux/man-pages/man3/malloc_info.3.html */ #include #include #include #include "coreneuron/utils/memory_utils.h" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" #if defined(__APPLE__) && defined(__MACH__) #include #elif defined HAVE_MALLOC_H #include #endif #ifdef CORENEURON_ENABLE_GPU #include "cuda_profiler_api.h" #endif namespace coreneuron { double nrn_mallinfo(void) { // -ve mem usage for non-supported platforms double mbs = -1.0; // on os x returns the current resident set size (physical memory in use) #if defined(__APPLE__) && defined(__MACH__) struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t) &info, &infoCount) != KERN_SUCCESS) return (size_t) 0L; /* Can't access? */ return info.resident_size / (1024.0 * 1024.0); #elif defined(MINGW) mbs = -1; #else std::ifstream file("/proc/self/statm"); if (file.is_open()) { unsigned long long int data_size; file >> data_size >> data_size; file.close(); mbs = (data_size * sysconf(_SC_PAGESIZE)) / (1024.0 * 1024.0); } else { #if defined HAVE_MALLOC_H // The mallinfo2() function was added in glibc 2.33 #if defined(__GLIBC__) && (__GLIBC__ >= 2 && __GLIBC_MINOR__ >= 33) struct mallinfo2 m = mallinfo2(); #else struct mallinfo m = mallinfo(); #endif mbs = (m.hblkhd + m.uordblks) / (1024.0 * 1024.0); #endif } #endif return mbs; } void report_mem_usage(const char* message, bool all_ranks) { double mem_max, mem_min, mem_avg; // min, max, avg memory // current memory usage on this rank double cur_mem = nrn_mallinfo(); /* @todo: avoid three all reduce class */ #if NRNMPI if (corenrn_param.mpi_enable) { mem_avg = nrnmpi_dbl_allreduce(cur_mem, 1) / nrnmpi_numprocs; mem_max = nrnmpi_dbl_allreduce(cur_mem, 2); mem_min = nrnmpi_dbl_allreduce(cur_mem, 3); } else #endif { mem_avg = mem_max = mem_min = cur_mem; } // all ranks prints information if all_ranks is true if (all_ranks) { printf(" Memory (MBs) (Rank : %2d) : %30s : Cur %.4lf, Max %.4lf, Min %.4lf, Avg %.4lf \n", nrnmpi_myid, message, cur_mem, mem_max, mem_min, mem_avg); } else if (nrnmpi_myid == 0) { printf(" Memory (MBs) : %25s : Max %.4lf, Min %.4lf, Avg %.4lf \n", message, mem_max, mem_min, mem_avg); #ifdef CORENEURON_ENABLE_GPU if (corenrn_param.gpu) { size_t free_byte, total_byte; cudaError_t cuda_status = cudaMemGetInfo(&free_byte, &total_byte); if (cudaSuccess != cuda_status) { std::printf("cudaMemGetInfo failed: %s\n", cudaGetErrorString(cuda_status)); } constexpr double MiB{1. / (1024. * 1024.)}; std::printf(" GPU Memory (MiBs) : Used = %f, Free = %f, Total = %f\n", (total_byte - free_byte) * MiB, free_byte * MiB, total_byte * MiB); } #endif } fflush(stdout); } } // namespace coreneuron ================================================ FILE: coreneuron/utils/memory_utils.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /** * @file memory_utils.h * @date 25th Oct 2014 * @brief Function prototypes for the functions providing * information about simulator memory usage * */ #pragma once namespace coreneuron { /** @brief Reports current memory usage of the simulator to stdout * * Current implementation is based on mallinfo. This routine prints * min, max and avg memory usage across mpi comm world * @param message string indicating current stage of the simulation * @param all_ranks indicate whether to print info from all ranks * @return Void */ void report_mem_usage(const char* message, bool all_ranks = false); /** @brief Returns current memory usage in KBs * @param Void * @return memory usage in KBs */ double nrn_mallinfo(void); } // namespace coreneuron ================================================ FILE: coreneuron/utils/nrn_assert.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include #include /* Preserving original behaviour requires that we abort() on * parse failures. * * Relying on assert() (as in the original code) is fragile, * as this becomes a NOP if the source is compiled with * NDEBUG defined. */ /** Emit formatted message to stderr, then abort(). */ static void abortf(const char* fmt, ...) { va_list va; va_start(va, fmt); vfprintf(stderr, fmt, va); va_end(va); abort(); } /** assert()-like macro, independent of NDEBUG status */ #define nrn_assert(x) \ ((x) || (abortf("%s:%d: Assertion '%s' failed.\n", __FILE__, __LINE__, #x), 0)) ================================================ FILE: coreneuron/utils/nrn_stats.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /** * @file nrn_stats.cpp * @date 25th Dec 2014 * @brief Function declarations for the cell statistics * */ #include #include #include #include #include "coreneuron/utils/nrn_stats.h" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/network/partrans.hpp" #include "coreneuron/io/output_spikes.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" namespace coreneuron { const int NUM_STATS = 13; void report_cell_stats() { long stat_array[NUM_STATS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; for (int ith = 0; ith < nrn_nthread; ++ith) { stat_array[0] += nrn_threads[ith].ncell; // number of cells stat_array[10] += nrn_threads[ith].end; // number of compartments stat_array[1] += nrn_threads[ith].n_presyn; // number of presyns stat_array[2] += nrn_threads[ith].n_input_presyn; // number of input presyns stat_array[3] += nrn_threads[ith].n_netcon; // number of netcons, synapses stat_array[4] += nrn_threads[ith].n_pntproc; // number of point processes if (nrn_partrans::transfer_thread_data_) { size_t n = nrn_partrans::transfer_thread_data_[ith].tar_indices.size(); stat_array[11] += n; // number of transfer targets n = nrn_partrans::transfer_thread_data_[ith].src_indices.size(); stat_array[12] += n; // number of transfer sources } } stat_array[5] = spikevec_gid.size(); // number of spikes stat_array[6] = std::count_if(spikevec_gid.cbegin(), spikevec_gid.cend(), [](const int& s) { return s > -1; }); // number of non-negative gid spikes #if NRNMPI long gstat_array[NUM_STATS]; if (corenrn_param.mpi_enable) { nrnmpi_long_allreduce_vec(stat_array, gstat_array, NUM_STATS, 1); } else { assert(sizeof(stat_array) == sizeof(gstat_array)); std::memcpy(gstat_array, stat_array, sizeof(stat_array)); } #else const long(&gstat_array)[NUM_STATS] = stat_array; #endif if (nrnmpi_myid == 0) { printf("\n\n Simulation Statistics\n"); printf(" Number of cells: %ld\n", gstat_array[0]); printf(" Number of compartments: %ld\n", gstat_array[10]); printf(" Number of presyns: %ld\n", gstat_array[1]); printf(" Number of input presyns: %ld\n", gstat_array[2]); printf(" Number of synapses: %ld\n", gstat_array[3]); printf(" Number of point processes: %ld\n", gstat_array[4]); printf(" Number of transfer sources: %ld\n", gstat_array[12]); printf(" Number of transfer targets: %ld\n", gstat_array[11]); printf(" Number of spikes: %ld\n", gstat_array[5]); printf(" Number of spikes with non negative gid-s: %ld\n", gstat_array[6]); } } } // namespace coreneuron ================================================ FILE: coreneuron/utils/nrn_stats.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ /** * @file nrn_stats.h * @date 25th Dec 2014 * @brief Function declarations for the cell statistics * */ #pragma once namespace coreneuron { /** @brief Reports global cell statistics of the simulation * * This routine prints the global number of cells, synapses of the simulation * @param void * @return void */ void report_cell_stats(); } // namespace coreneuron ================================================ FILE: coreneuron/utils/nrnmutdec.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #if defined(_OPENMP) #include // This class respects the requirement *Mutex* class OMP_Mutex { public: // Default constructible OMP_Mutex() { omp_init_lock(&mut_); } // Destructible ~OMP_Mutex() { omp_destroy_lock(&mut_); } // Not copyable OMP_Mutex(const OMP_Mutex&) = delete; OMP_Mutex& operator=(const OMP_Mutex&) = delete; // Not movable OMP_Mutex(const OMP_Mutex&&) = delete; OMP_Mutex& operator=(const OMP_Mutex&&) = delete; // Basic Lockable void lock() { omp_set_lock(&mut_); } void unlock() { omp_unset_lock(&mut_); } // Lockable bool try_lock() { return omp_test_lock(&mut_) != 0; } private: omp_lock_t mut_; }; #else // This class respects the requirement *Mutex* class OMP_Mutex { public: // Default constructible OMP_Mutex() = default; // Destructible ~OMP_Mutex() = default; // Not copyable OMP_Mutex(const OMP_Mutex&) = delete; OMP_Mutex& operator=(const OMP_Mutex&) = delete; // Not movable OMP_Mutex(const OMP_Mutex&&) = delete; OMP_Mutex& operator=(const OMP_Mutex&&) = delete; // Basic Lockable void lock() {} void unlock() {} // Lockable bool try_lock() { return true; } }; #endif ================================================ FILE: coreneuron/utils/nrnoc_aux.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/coreneuron.hpp" #include "coreneuron/utils/nrnoc_aux.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" namespace coreneuron { bool stoprun; int v_structure_change; int diam_changed; #define MAXERRCOUNT 5 int hoc_errno_count; const char* bbcore_write_version = "1.6"; // Allow multiple gid and PreSyn per real cell. char* pnt_name(Point_process* pnt) { return corenrn.get_memb_func(pnt->_type).sym; } void nrn_exit(int err) { #if NRNMPI if (corenrn_param.mpi_enable) { nrnmpi_finalize(); } #endif exit(err); } void hoc_execerror(const char* s1, const char* s2) { printf("error: %s %s\n", s1, s2 ? s2 : ""); abort(); } void hoc_warning(const char* s1, const char* s2) { printf("warning: %s %s\n", s1, s2 ? s2 : ""); } double* makevector(size_t size) { return (double*) ecalloc(size, sizeof(char)); } void freevector(double* p) { if (p) { free(p); } } double** makematrix(size_t nrows, size_t ncols) { double** matrix = (double**) emalloc(nrows * sizeof(double*)); *matrix = (double*) emalloc(nrows * ncols * sizeof(double)); for (size_t i = 1; i < nrows; i++) matrix[i] = matrix[i - 1] + ncols; return (matrix); } void freematrix(double** matrix) { if (matrix != nullptr) { free(*matrix); free(matrix); } } void* emalloc(size_t size) { void* memptr = malloc(size); assert(memptr); return memptr; } /* some user mod files may use this in VERBATIM */ void* hoc_Emalloc(size_t size) { return emalloc(size); } void hoc_malchk(void) {} void* ecalloc(size_t n, size_t size) { if (n == 0) { return nullptr; } void* p = calloc(n, size); assert(p); return p; } void* erealloc(void* ptr, size_t size) { if (!ptr) { return emalloc(size); } void* p = realloc(ptr, size); assert(p); return p; } void* nrn_cacheline_alloc(void** memptr, size_t size) { alloc_memory(*memptr, size, 64); return *memptr; } /* used by nmodl and other c, c++ code */ double hoc_Exp(double x) { if (x < -700.) { return 0.; } else if (x > 700) { errno = ERANGE; if (++hoc_errno_count < MAXERRCOUNT) { fprintf(stderr, "exp(%g) out of range, returning exp(700)\n", x); } if (hoc_errno_count == MAXERRCOUNT) { fprintf(stderr, "No more errno warnings during this execution\n"); } return exp(700.); } return exp(x); } /* check for version bbcore_write version between NEURON and CoreNEURON * abort in case of missmatch */ void check_bbcore_write_version(const char* version) { if (strcmp(version, bbcore_write_version) != 0) { if (nrnmpi_myid == 0) fprintf(stderr, "Error: Incompatible binary input dataset version (expected %s, input %s)\n", bbcore_write_version, version); abort(); } } } // namespace coreneuron ================================================ FILE: coreneuron/utils/nrnoc_aux.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include "coreneuron/mechanism/mechanism.hpp" namespace coreneuron { extern int v_structure_change; extern int diam_changed; extern int structure_change_cnt; extern char* pnt_name(Point_process* pnt); extern void nrn_exit(int); extern void* emalloc(size_t size); extern void* ecalloc(size_t n, size_t size); extern void* erealloc(void* ptr, size_t size); extern double* makevector(size_t size); /* size in bytes */ extern double** makematrix(size_t nrow, size_t ncol); void freevector(double*); void freematrix(double**); extern void hoc_execerror(const char*, const char*); /* print and abort */ extern void hoc_warning(const char*, const char*); extern double hoc_Exp(double x); } // namespace coreneuron ================================================ FILE: coreneuron/utils/nrntimeout.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/utils/utils.hpp" #if NRNMPI #include #include /* if you are using any sampling based profiling tool, setitimer will conflict with profiler. In that case, user can disable setitimer which is just safety for deadlock situations */ namespace coreneuron { #if (defined(DISABLE_TIMEOUT) || defined(MINGW)) void nrn_timeout(int seconds) {} #else void (*nrntimeout_call)(); static double told; static struct itimerval value; static struct sigaction act, oact; static void timed_out(int sig) { (void) sig; /* unused */ #if CORENRN_DEBUG printf("timed_out told=%g t=%g\n", told, t); #endif if (nrn_threads->_t == told) { /* nothing has been accomplished since last signal*/ printf("nrn_timeout t=%g\n", nrn_threads->_t); if (nrntimeout_call) { (*nrntimeout_call)(); } nrn_abort(0); } told = nrn_threads->_t; } void nrn_timeout(int seconds) { if (nrnmpi_myid != 0) { return; } #if CORENRN_DEBUG printf("nrn_timeout %d\n", seconds); #endif if (seconds) { told = nrn_threads->_t; act.sa_handler = timed_out; act.sa_flags = SA_RESTART; if (sigaction(SIGALRM, &act, &oact)) { printf("sigaction failed\n"); nrn_abort(0); } } else { sigaction(SIGALRM, &oact, (struct sigaction*) 0); } value.it_interval.tv_sec = seconds; value.it_interval.tv_usec = 0; value.it_value.tv_sec = seconds; value.it_value.tv_usec = 0; if (setitimer(ITIMER_REAL, &value, (struct itimerval*) 0)) { printf("setitimer failed\n"); nrn_abort(0); } } #endif /* DISABLE_TIMEOUT */ } // namespace coreneuron #endif /*NRNMPI*/ ================================================ FILE: coreneuron/utils/offload.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #define nrn_pragma_stringify(x) #x #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP) #define nrn_pragma_acc(x) #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x)) #include #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) #define nrn_pragma_acc(x) _Pragma(nrn_pragma_stringify(acc x)) #define nrn_pragma_omp(x) #include #else #define nrn_pragma_acc(x) #define nrn_pragma_omp(x) #endif #include #include #include namespace coreneuron { void cnrn_target_copyin_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, std::size_t len, void* d_ptr); void cnrn_target_delete_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, std::size_t len); void cnrn_target_deviceptr_debug(std::string_view file, int line, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr); void cnrn_target_is_present_debug(std::string_view file, int line, std::type_info const& typeid_T, void const* h_ptr, void* d_ptr); void cnrn_target_memcpy_to_device_debug(std::string_view file, int line, std::size_t sizeof_T, std::type_info const& typeid_T, void const* h_ptr, std::size_t len, void* d_ptr); #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_UNIFIED_MEMORY) && \ defined(__NVCOMPILER_MAJOR__) && defined(__NVCOMPILER_MINOR__) && \ (__NVCOMPILER_MAJOR__ <= 22) && (__NVCOMPILER_MINOR__ <= 3) // Homegrown implementation for buggy NVHPC versions (<=22.3), see // https://forums.developer.nvidia.com/t/acc-deviceptr-does-not-work-in-openacc-code-dynamically-loaded-from-a-shared-library/211599 #define CORENEURON_ENABLE_PRESENT_TABLE std::pair cnrn_target_deviceptr_impl(bool must_be_present_or_null, void const* h_ptr); void cnrn_target_copyin_update_present_table(void const* h_ptr, void* d_ptr, std::size_t len); void cnrn_target_delete_update_present_table(void const* h_ptr, std::size_t len); #endif template T* cnrn_target_deviceptr_or_present(std::string_view file, int line, bool must_be_present_or_null, const T* h_ptr) { T* d_ptr{}; bool error{false}; #ifdef CORENEURON_ENABLE_PRESENT_TABLE auto const d_ptr_and_error = cnrn_target_deviceptr_impl(must_be_present_or_null, h_ptr); d_ptr = static_cast(d_ptr_and_error.first); error = d_ptr_and_error.second; #elif defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) d_ptr = static_cast(acc_deviceptr(const_cast(h_ptr))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) if (must_be_present_or_null || omp_target_is_present(h_ptr, omp_get_default_device())) { nrn_pragma_omp(target data use_device_ptr(h_ptr)) { d_ptr = const_cast(h_ptr); } } #else if (must_be_present_or_null && h_ptr) { throw std::runtime_error( "cnrn_target_deviceptr() not implemented without OpenACC/OpenMP and gpu build"); } #endif if (must_be_present_or_null) { cnrn_target_deviceptr_debug(file, line, typeid(T), h_ptr, d_ptr); } else { cnrn_target_is_present_debug(file, line, typeid(T), h_ptr, d_ptr); } if (error) { throw std::runtime_error( "cnrn_target_deviceptr() encountered an error, you may want to try setting " "CORENEURON_GPU_DEBUG=1"); } return d_ptr; } template T* cnrn_target_copyin(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) { T* d_ptr{}; #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) d_ptr = static_cast(acc_copyin(const_cast(h_ptr), len * sizeof(T))); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) nrn_pragma_omp(target enter data map(to : h_ptr[:len])) nrn_pragma_omp(target data use_device_ptr(h_ptr)) { d_ptr = const_cast(h_ptr); } #else throw std::runtime_error( "cnrn_target_copyin() not implemented without OpenACC/OpenMP and gpu build"); #endif #ifdef CORENEURON_ENABLE_PRESENT_TABLE cnrn_target_copyin_update_present_table(h_ptr, d_ptr, len * sizeof(T)); #endif cnrn_target_copyin_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr); return d_ptr; } template void cnrn_target_delete(std::string_view file, int line, T* h_ptr, std::size_t len = 1) { cnrn_target_delete_debug(file, line, sizeof(T), typeid(T), h_ptr, len); #ifdef CORENEURON_ENABLE_PRESENT_TABLE cnrn_target_delete_update_present_table(h_ptr, len * sizeof(T)); #endif #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) acc_delete(h_ptr, len * sizeof(T)); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) nrn_pragma_omp(target exit data map(delete : h_ptr[:len])) #else throw std::runtime_error( "cnrn_target_delete() not implemented without OpenACC/OpenMP and gpu build"); #endif } template void cnrn_target_memcpy_to_device(std::string_view file, int line, T* d_ptr, const T* h_ptr, std::size_t len = 1) { cnrn_target_memcpy_to_device_debug(file, line, sizeof(T), typeid(T), h_ptr, len, d_ptr); #if defined(CORENEURON_ENABLE_GPU) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENACC) acc_memcpy_to_device(d_ptr, const_cast(h_ptr), len * sizeof(T)); #elif defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) omp_target_memcpy(d_ptr, const_cast(h_ptr), len * sizeof(T), 0, 0, omp_get_default_device(), omp_get_initial_device()); #else throw std::runtime_error( "cnrn_target_memcpy_to_device() not implemented without OpenACC/OpenMP and gpu build"); #endif } template void cnrn_target_update_on_device(std::string_view file, int line, const T* h_ptr, std::size_t len = 1) { auto* d_ptr = cnrn_target_deviceptr_or_present(file, line, true, h_ptr); cnrn_target_memcpy_to_device(file, line, d_ptr, h_ptr); } // Replace with std::source_location once we have C++20 #define cnrn_target_copyin(...) cnrn_target_copyin(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_delete(...) cnrn_target_delete(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_is_present(...) \ cnrn_target_deviceptr_or_present(__FILE__, __LINE__, false, __VA_ARGS__) #define cnrn_target_deviceptr(...) \ cnrn_target_deviceptr_or_present(__FILE__, __LINE__, true, __VA_ARGS__) #define cnrn_target_memcpy_to_device(...) \ cnrn_target_memcpy_to_device(__FILE__, __LINE__, __VA_ARGS__) #define cnrn_target_update_on_device(...) \ cnrn_target_update_on_device(__FILE__, __LINE__, __VA_ARGS__) } // namespace coreneuron ================================================ FILE: coreneuron/utils/profile/profiler_interface.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once #include #include #if defined(CORENEURON_CALIPER) #include #endif #ifdef CORENEURON_CUDA_PROFILING #include #endif #if defined(CRAYPAT) #include #endif #if defined(TAU) #include #endif #if defined(LIKWID_PERFMON) #include #endif namespace coreneuron { namespace detail { /*! \class Instrumentor * \brief Instrumentation infrastructure for benchmarking and profiling. * * The Instrumentor class exposes static methods that can be used to * toggle with fine-grained resolution the profiling of specific * areas within the code. */ template struct Instrumentor { #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-value" /*! \fn phase_begin * \brief Activate the collection of profiling data within a code region. * * This function semantically defines the beginning of a region * of code that the user wishes to profile. * Loops through all enabled profilers and calls the relevant * `phase_begin` function. * This function should have a non-empty implementation only for * profilers that allow multiple code regions with different names * to be profiled concurrently. * * @param name the (unique) identifier of the code region to be profiled */ inline static void phase_begin(const char* name) { std::initializer_list{(TProfilerImpl::phase_begin(name), 0)...}; } /*! \fn phase_end * \brief Deactivate the collection of profiling data within a code region. * * This function semantically defines the end of a region * of code that the user wishes to profile. * Loops through all enabled profilers and calls the relevant * `phase_end` function. * This function should have a non-empty implementation only for * profilers that allow multiple code regions with different names * to be profiled concurrently. * * @param name the (unique) identifier of the code region to be profiled */ inline static void phase_end(const char* name) { std::initializer_list{(TProfilerImpl::phase_end(name), 0)...}; } /*! \fn start_profile * \brief Globally activate the collection of profiling data. * * Activate the collection of profiler data without defining * a region of interest with a given name, as opposed to `phase_begin`. * Loops through all enabled profilers and calls the relevant * `start_profile` function. * This function should have a non-empty implementation only for * profilers that expose simply a global begin/end interface, without * named regions. */ inline static void start_profile() { std::initializer_list{(TProfilerImpl::start_profile(), 0)...}; } /*! \fn stop_profile * \brief Globally deactivate the collection of profiling data. * * Deactivate the collection of profiler data without defining * a region of interest with a given name, as opposed to `phase_end`. * Loops through all enabled profilers and calls the relevant * `stop_profile` function. * This function should have a non-empty implementation only for * profilers that expose simply a global begin/end interface, without * named regions. */ inline static void stop_profile() { std::initializer_list{(TProfilerImpl::stop_profile(), 0)...}; } /*! \fn init_profile * \brief Initialize the profiler. * * Initialize a profiler's internal structure, without activating yet * any data collection, similar in concept to MPI_Init. * Loops through all enabled profilers and calls the relevant * `init_profile` function. * This function should have a non-empty implementation only for * profilers that require special initialization, typically before * any memory allocation is done. */ inline static void init_profile() { std::initializer_list{(TProfilerImpl::init_profile(), 0)...}; } /*! \fn finalize_profile * \brief Finalize the profiler. * * Finalize a profiler's internal structure, without activating yet * any data collection, similar in concept to MPI_Finalize. * Loops through all enabled profilers and calls the relevant * `finalize_profile` function. * This function should have a non-empty implementation only for * profilers that require special finalization. */ inline static void finalize_profile() { std::initializer_list{(TProfilerImpl::finalize_profile(), 0)...}; } #pragma clang diagnostic pop }; #if defined(CORENEURON_CALIPER) struct Caliper { inline static void phase_begin(const char* name) { CALI_MARK_BEGIN(name); }; inline static void phase_end(const char* name) { CALI_MARK_END(name); }; inline static void start_profile(){}; inline static void stop_profile(){}; inline static void init_profile(){}; inline static void finalize_profile(){}; }; #endif #ifdef CORENEURON_CUDA_PROFILING struct CudaProfiling { inline static void phase_begin(const char* name){}; inline static void phase_end(const char* name){}; inline static void start_profile() { cudaProfilerStart(); }; inline static void stop_profile() { cudaProfilerStop(); }; inline static void init_profile(){}; inline static void finalize_profile(){}; }; #endif #if defined(CRAYPAT) struct CrayPat { inline static void phase_begin(const char* name){}; inline static void phase_end(const char* name){}; inline static void start_profile() { PAT_record(PAT_STATE_ON); }; inline static void stop_profile() { PAT_record(PAT_STATE_OFF); }; inline static void init_profile(){}; inline static void finalize_profile(){}; }; #endif #if defined(TAU) struct Tau { inline static void phase_begin(const char* name){}; inline static void phase_end(const char* name){}; inline static void start_profile() { TAU_ENABLE_INSTRUMENTATION(); }; inline static void stop_profile() { TAU_DISABLE_INSTRUMENTATION(); }; inline static void init_profile(){}; inline static void finalize_profile(){}; }; #endif #if defined(LIKWID_PERFMON) struct Likwid { inline static void phase_begin(const char* name) { LIKWID_MARKER_START(name); }; inline static void phase_end(const char* name) { LIKWID_MARKER_STOP(name); }; inline static void start_profile(){}; inline static void stop_profile(){}; inline static void init_profile() { LIKWID_MARKER_INIT; #pragma omp parallel { LIKWID_MARKER_THREADINIT; } }; inline static void finalize_profile() { LIKWID_MARKER_CLOSE; }; }; #endif struct NullInstrumentor { inline static void phase_begin(const char* name){}; inline static void phase_end(const char* name){}; inline static void start_profile(){}; inline static void stop_profile(){}; inline static void init_profile(){}; inline static void finalize_profile(){}; }; using InstrumentorImpl = detail::Instrumentor< #if defined CORENEURON_CALIPER detail::Caliper, #endif #ifdef CORENEURON_CUDA_PROFILING detail::CudaProfiling, #endif #if defined(CRAYPAT) detail::CrayPat, #endif #if defined(TAU) detail::Tau, #endif #if defined(LIKWID_PERFMON) detail::Likwid, #endif detail::NullInstrumentor>; } // namespace detail namespace Instrumentor { struct phase { const char* phase_name; phase(const char* name) : phase_name(name) { detail::InstrumentorImpl::phase_begin(phase_name); } ~phase() { detail::InstrumentorImpl::phase_end(phase_name); } }; inline static void start_profile() { detail::InstrumentorImpl::start_profile(); } inline static void stop_profile() { detail::InstrumentorImpl::stop_profile(); } inline static void phase_begin(const char* name) { detail::InstrumentorImpl::phase_begin(name); } inline static void phase_end(const char* name) { detail::InstrumentorImpl::phase_end(name); } inline static void init_profile() { detail::InstrumentorImpl::init_profile(); } inline static void finalize_profile() { detail::InstrumentorImpl::finalize_profile(); } } // namespace Instrumentor } // namespace coreneuron ================================================ FILE: coreneuron/utils/progressbar/progressbar.cpp ================================================ /** * \file * \author Trevor Fountain * \author Johannes Buchner * \author Erik Garrison * \date 2010-2014 * \copyright BSD 3-Clause * * progressbar -- a C class (by convention) for displaying progress * on the command line (to stdout). */ #include "coreneuron/utils/progressbar/progressbar.hpp" #include #include #include #include /// How wide we assume the screen is if termcap fails. enum { DEFAULT_SCREEN_WIDTH = 80 }; /// The smallest that the bar can ever be (not including borders) enum { MINIMUM_BAR_WIDTH = 10 }; /// The format in which the estimated remaining time will be reported static const char* const ETA_FORMAT = "t: %-6.2f ETA:%2dh%02dm%02ds"; /// The maximum number of characters that the ETA_FORMAT can ever yield enum { ETA_FORMAT_LENGTH = 13 }; /// Amount of screen width taken up by whitespace (i.e. whitespace between label/bar/ETA components) enum { WHITESPACE_LENGTH = 2 }; /// The amount of width taken up by the border of the bar component. enum { BAR_BORDER_WIDTH = 2 }; /// The maximum number of bar redraws (to avoid frequent output in long runs) enum { BAR_DRAW_COUNT_MAX = 500 }; enum { BAR_DRAW_INTERVAL = 1, BAR_DRAW_INTERVAL_NOTTY = 5 }; /// Models a duration of time broken into hour/minute/second components. The number of seconds /// should be less than the /// number of seconds in one minute, and the number of minutes should be less than the number of /// minutes in one hour. struct progressbar_time_components { int hours; int minutes; int seconds; }; static void progressbar_draw(const progressbar* bar); static int progressbar_remaining_seconds(const progressbar* bar); /** * Create a new progress bar with the specified label, max number of steps, and format string. * Note that `format` must be exactly three characters long, e.g. "<->" to render a progress * bar like "<---------->". Returns nullptr if there isn't enough memory to allocate a progressbar */ progressbar* progressbar_new_with_format(const char* label, unsigned long max, const char* format) { auto* new_bar = static_cast(malloc(sizeof(progressbar))); if (new_bar == nullptr) { return nullptr; } new_bar->max = max; new_bar->value = 0; new_bar->draw_time_interval = isatty(STDOUT_FILENO) ? BAR_DRAW_INTERVAL : BAR_DRAW_INTERVAL_NOTTY; new_bar->t = 0; new_bar->start = time(nullptr); assert(3 == strlen(format) && "format must be 3 characters in length"); new_bar->format.begin = format[0]; new_bar->format.fill = format[1]; new_bar->format.end = format[2]; progressbar_update_label(new_bar, label); progressbar_draw(new_bar); new_bar->prev_t = difftime(time(nullptr), new_bar->start); new_bar->drawn_count = 1; return new_bar; } /** * Create a new progress bar with the specified label and max number of steps. */ progressbar* progressbar_new(const char* label, unsigned long max) { return progressbar_new_with_format(label, max, "|=|"); } void progressbar_update_label(progressbar* bar, const char* label) { bar->label = label; } /** * Delete an existing progress bar. */ void progressbar_free(progressbar* bar) { free(bar); } /** * Increment an existing progressbar by `value` steps. * Additionally issues a redraw in case a certain time interval has elapsed (min: 1sec) * Reasons for a larger interval are: * - Stdout is not TTY * - Respect BAR_DRAW_COUNT_MAX */ void progressbar_update(progressbar* bar, unsigned long value, double t) { bar->value = value; bar->t = t; int sim_time = difftime(time(nullptr), bar->start); // If there is not enough time passed to redraw the progress bar return if ((sim_time - bar->prev_t) < bar->draw_time_interval) { return; } progressbar_draw(bar); bar->drawn_count++; bar->prev_t = sim_time; if (bar->drawn_count >= BAR_DRAW_COUNT_MAX || sim_time < 15) { // Dont change the interval after the limit. Simulation should be over any moment and // avoid the calc of draw_time_interval which could raise DIV/0 // Also, dont do it the first 15sec to avoid really bad estimates which could potentially // delay a better estimate too far away in the future. return; } // Sample ETA to calculate the next interval until the redraw of the progressbar int eta_s = progressbar_remaining_seconds(bar); bar->draw_time_interval = eta_s / (BAR_DRAW_COUNT_MAX - bar->drawn_count); if (bar->draw_time_interval < BAR_DRAW_INTERVAL_NOTTY) { bar->draw_time_interval = isatty(STDOUT_FILENO) ? ((bar->draw_time_interval < BAR_DRAW_INTERVAL) ? BAR_DRAW_INTERVAL : bar->draw_time_interval) : BAR_DRAW_INTERVAL_NOTTY; } } /** * Increment an existing progressbar by a single step. */ void progressbar_inc(progressbar* bar, double t) { progressbar_update(bar, bar->value + 1, t); } static void progressbar_write_char(FILE* file, const int ch, const size_t times) { for (std::size_t i = 0; i < times; ++i) { fputc(ch, file); } } static int progressbar_max(int x, int y) { return x > y ? x : y; } static unsigned int get_screen_width(void) { return DEFAULT_SCREEN_WIDTH; } static int progressbar_bar_width(int screen_width, int label_length) { return progressbar_max(MINIMUM_BAR_WIDTH, screen_width - label_length - ETA_FORMAT_LENGTH - WHITESPACE_LENGTH); } static int progressbar_label_width(int screen_width, int label_length, int bar_width) { int eta_width = ETA_FORMAT_LENGTH; // If the progressbar is too wide to fit on the screen, we must sacrifice the label. if (label_length + 1 + bar_width + 1 + ETA_FORMAT_LENGTH > screen_width) { return progressbar_max(0, screen_width - bar_width - eta_width - WHITESPACE_LENGTH); } else { return label_length; } } static int progressbar_remaining_seconds(const progressbar* bar) { double offset = difftime(time(nullptr), bar->start); if (bar->value > 0 && offset > 0) { return (offset / (double) bar->value) * (bar->max - bar->value); } else { return 0; } } static progressbar_time_components progressbar_calc_time_components(int seconds) { int hours = seconds / 3600; seconds -= hours * 3600; int minutes = seconds / 60; seconds -= minutes * 60; progressbar_time_components components = {hours, minutes, seconds}; return components; } static void progressbar_draw(const progressbar* bar) { int screen_width = get_screen_width(); int label_length = strlen(bar->label); int bar_width = progressbar_bar_width(screen_width, label_length); int label_width = progressbar_label_width(screen_width, label_length, bar_width); int progressbar_completed = (bar->value >= bar->max); int bar_piece_count = bar_width - BAR_BORDER_WIDTH; int bar_piece_current = (progressbar_completed) ? bar_piece_count : bar_piece_count * ((double) bar->value / bar->max); progressbar_time_components eta = (progressbar_completed) ? progressbar_calc_time_components(difftime(time(nullptr), bar->start)) : progressbar_calc_time_components(progressbar_remaining_seconds(bar)); if (label_width == 0) { // The label would usually have a trailing space, but in the case that we don't print // a label, the bar can use that space instead. bar_width += 1; } else { // Draw the label fwrite(bar->label, 1, label_width, stdout); fputc(' ', stdout); } // Draw the progressbar fputc(bar->format.begin, stdout); progressbar_write_char(stdout, bar->format.fill, bar_piece_current); progressbar_write_char(stdout, ' ', bar_piece_count - bar_piece_current); fputc(bar->format.end, stdout); // Draw the ETA fputc(' ', stdout); fprintf(stdout, ETA_FORMAT, bar->t, eta.hours, eta.minutes, eta.seconds); fputc('\r', stdout); fflush(stdout); } /** * Finish a progressbar, indicating 100% completion, and free it. */ void progressbar_finish(progressbar* bar) { // Make sure we fill the progressbar so things look complete. progressbar_draw(bar); // Print a newline, so that future outputs to stdout look prettier fprintf(stdout, "\n"); // We've finished with this progressbar, so go ahead and free it. progressbar_free(bar); } ================================================ FILE: coreneuron/utils/progressbar/progressbar.hpp ================================================ /** * \file * \author Trevor Fountain * \author Johannes Buchner * \author Erik Garrison * \date 2010-2014 * \copyright BSD 3-Clause * * progressbar -- a C class (by convention) for displaying progress * on the command line (to stderr). */ #pragma once #include #include #include #include /** * Progressbar data structure (do not modify or create directly) */ struct progressbar { /// maximum value unsigned long max; /// current value unsigned long value; /// value of the previous progress bar drawn in output unsigned long prev_sample_value; /// time interval between consecutive bar redraws (seconds) time_t draw_time_interval; /// number of redrawn bars unsigned long drawn_count; /// time progressbar was started time_t start; /// time progressbar was drawn for last time time_t prev_t; /// label const char* label; /// current time (added for simulation) double t; /// characters for the beginning, filling and end of the /// progressbar. E.g. |### | has |#| struct { char begin; char fill; char end; } format; }; /// Create a new progressbar with the specified label and number of steps. /// /// @param label The label that will prefix the progressbar. /// @param max The number of times the progressbar must be incremented before it is considered /// complete, or, in other words, the number of tasks that this progressbar is tracking. /// @return A progressbar configured with the provided arguments. Note that the user is responsible /// for disposing of the progressbar via progressbar_finish when finished with the object. progressbar* progressbar_new(const char* label, unsigned long max); /// Create a new progressbar with the specified label, number of steps, and format string. /// /// @param label The label that will prefix the progressbar. /// @param max The number of times the progressbar must be incremented before it is considered /// complete, or, in other words, the number of tasks that this progressbar is tracking. /// @param format The format of the progressbar. The string provided must be three characters, and /// it will be interpretted with the first character as the left border of the bar, the second /// character of the bar and the third character as the right border of the bar. For example, /// "<->" would result in a bar formatted like "<------ >". /// /// @return A progressbar configured with the provided arguments. Note that the user is responsible /// for disposing of the progressbar via progressbar_finish when finished with the object. progressbar* progressbar_new_with_format(const char* label, unsigned long max, const char* format); /// Free an existing progress bar. Don't call this directly; call *progressbar_finish* instead. void progressbar_free(progressbar* bar); /// Increment the given progressbar. Don't increment past the initialized # of steps, though. void progressbar_inc(progressbar* bar, double t); /// Set the current status on the given progressbar. void progressbar_update(progressbar* bar, unsigned long value, double t); /// Set the label of the progressbar. Note that no rendering is done. The label is simply set so /// that the next rendering will use the new label. To immediately see the new label, call /// progressbar_draw. /// Does not update display or copy the label void progressbar_update_label(progressbar* bar, const char* label); /// Finalize (and free!) a progressbar. Call this when you're done, or if you break out /// partway through. void progressbar_finish(progressbar* bar); ================================================ FILE: coreneuron/utils/randoms/nrnran123.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/mpi/core/nrnmpi.hpp" #include "coreneuron/utils/memory.h" #include "coreneuron/utils/nrnmutdec.hpp" #include "coreneuron/utils/randoms/nrnran123.h" #ifdef CORENEURON_USE_BOOST_POOL #include #include #endif #include #include #include #include // Defining these attributes seems to help nvc++ in OpenMP target offload mode. #if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && \ defined(_OPENMP) && defined(__CUDACC__) #define CORENRN_HOST_DEVICE __host__ __device__ #else #define CORENRN_HOST_DEVICE #endif namespace { #ifdef CORENEURON_USE_BOOST_POOL /** Tag type for use with boost::fast_pool_allocator that forwards to * coreneuron::[de]allocate_unified(). Using a Random123-specific type here * makes sure that allocations do not come from the same global pool as other * usage of boost pools for objects with sizeof == sizeof(nrnran123_State). * * The messy m_block_sizes map is just because `deallocate_unified` uses sized * deallocations, but the Boost pool allocators don't. Because this is hidden * behind the pool mechanism, these methods are not called very often and the * overhead is minimal. */ struct random123_allocate_unified { using size_type = std::size_t; using difference_type = std::size_t; static char* malloc(const size_type bytes) { std::lock_guard const lock{m_mutex}; static_cast(lock); auto* buffer = coreneuron::allocate_unified(bytes); m_block_sizes[buffer] = bytes; return reinterpret_cast(buffer); } static void free(char* const block) { std::lock_guard const lock{m_mutex}; static_cast(lock); auto const iter = m_block_sizes.find(block); assert(iter != m_block_sizes.end()); auto const size = iter->second; m_block_sizes.erase(iter); return coreneuron::deallocate_unified(block, size); } static std::mutex m_mutex; static std::unordered_map m_block_sizes; }; std::mutex random123_allocate_unified::m_mutex{}; std::unordered_map random123_allocate_unified::m_block_sizes{}; using random123_allocator = boost::fast_pool_allocator; #else using random123_allocator = coreneuron::unified_allocator; #endif /* Global data structure per process. Using a unique_ptr here causes [minor] * problems because its destructor can be called very late during application * shutdown. If the destructor calls cudaFree and the CUDA runtime has already * been shut down then tools like cuda-memcheck reports errors. */ OMP_Mutex g_instance_count_mutex; std::size_t g_instance_count{}; #ifdef __CUDACC__ #define g_k_qualifiers __device__ __constant__ #else #define g_k_qualifiers #endif g_k_qualifiers philox4x32_key_t g_k{}; // Cannot refer to g_k directly from a nrn_pragma_acc(routine seq) method like // coreneuron_random123_philox4x32_helper, and cannot have this inlined there at // higher optimisation levels __attribute__((noinline)) philox4x32_key_t& global_state() { return g_k; } } // namespace CORENRN_HOST_DEVICE philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s) { return philox4x32(s->c, global_state()); } namespace coreneuron { std::size_t nrnran123_instance_count() { return g_instance_count; } /* if one sets the global, one should reset all the stream sequences. */ uint32_t nrnran123_get_globalindex() { return global_state().v[0]; } /* nrn123 streams are created from cpu launcher routine */ void nrnran123_set_globalindex(uint32_t gix) { // If the global seed is changing then we shouldn't have any active streams. auto& g_k = global_state(); { std::lock_guard _{g_instance_count_mutex}; if (g_instance_count != 0 && nrnmpi_myid == 0) { std::cout << "nrnran123_set_globalindex(" << gix << ") called when a non-zero number of Random123 streams (" << g_instance_count << ") were active. This is not safe, some streams will remember the old value (" << g_k.v[0] << ')' << std::endl; } } if (g_k.v[0] != gix) { g_k.v[0] = gix; if (coreneuron::gpu_enabled()) { #ifdef __CUDACC__ { auto const code = cudaMemcpyToSymbol(g_k, &g_k, sizeof(g_k)); assert(code == cudaSuccess); } { auto const code = cudaDeviceSynchronize(); assert(code == cudaSuccess); } #else nrn_pragma_acc(update device(g_k)) nrn_pragma_omp(target update to(g_k)) #endif } } } void nrnran123_initialise_global_state_on_device() { if (coreneuron::gpu_enabled()) { #ifndef __CUDACC__ nrn_pragma_acc(enter data copyin(g_k)) #endif } } void nrnran123_destroy_global_state_on_device() { if (coreneuron::gpu_enabled()) { #ifndef __CUDACC__ nrn_pragma_acc(exit data delete (g_k)) #endif } } /** @brief Allocate a new Random123 stream. * @todo It would be nicer if the API return type was * std::unique_ptr, so we could use a * custom allocator/deleter and avoid the (fragile) need for matching * nrnran123_deletestream calls. */ nrnran123_State* nrnran123_newstream3(uint32_t id1, uint32_t id2, uint32_t id3, bool use_unified_memory) { // The `use_unified_memory` argument is an implementation detail to keep the // old behaviour that some Random123 streams that are known to only be used // from the CPU are allocated using new/delete instead of unified memory. // See OPENACC_EXCLUDED_FILES in coreneuron/CMakeLists.txt. If we dropped // this feature then we could always use coreneuron::unified_allocator. #ifndef CORENEURON_ENABLE_GPU if (use_unified_memory) { throw std::runtime_error("Tried to use CUDA unified memory in a non-GPU build."); } #endif nrnran123_State* s{nullptr}; if (use_unified_memory) { s = coreneuron::allocate_unique(random123_allocator{}).release(); } else { s = new nrnran123_State{}; } s->c.v[0] = 0; s->c.v[1] = id3; s->c.v[2] = id1; s->c.v[3] = id2; nrnran123_setseq(s, 0, 0); { std::lock_guard _{g_instance_count_mutex}; ++g_instance_count; } return s; } /* nrn123 streams are destroyed from cpu launcher routine */ void nrnran123_deletestream(nrnran123_State* s, bool use_unified_memory) { #ifndef CORENEURON_ENABLE_GPU if (use_unified_memory) { throw std::runtime_error("Tried to use CUDA unified memory in a non-GPU build."); } #endif { std::lock_guard _{g_instance_count_mutex}; --g_instance_count; } if (use_unified_memory) { std::unique_ptr> _{s}; } else { delete s; } } } // namespace coreneuron ================================================ FILE: coreneuron/utils/randoms/nrnran123.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once /* interface to Random123 */ /* http://www.thesalmons.org/john/random123/papers/random123sc11.pdf */ /* The 4x32 generators utilize a uint32x4 counter and uint32x4 key to transform into an almost cryptographic quality uint32x4 random result. There are many possibilites for balancing the sharing of the internal state instances while reserving a uint32 counter for the stream sequence and reserving other portions of the counter vector for stream identifiers and global index used by all streams. We currently provide a single instance by default in which the policy is to use the 0th counter uint32 as the stream sequence, words 2 and 3 as the stream identifier, and word 0 of the key as the global index. Unused words are constant uint32 0. It is also possible to use Random123 directly without reference to this interface. See Random123-1.02/docs/html/index.html of the full distribution available from http://www.deshawresearch.com/resources_random123.html */ #ifdef __bgclang__ #define R123_USE_MULHILO64_MULHI_INTRIN 0 #define R123_USE_GNU_UINT128 1 #endif #include "coreneuron/utils/offload.hpp" #include #include #include // Some files are compiled with DISABLE_OPENACC, and some builds have no GPU // support at all. In these two cases, request that the random123 state is // allocated using new/delete instead of CUDA unified memory. #if defined(CORENEURON_ENABLE_GPU) && !defined(DISABLE_OPENACC) #define CORENRN_RAN123_USE_UNIFIED_MEMORY true #else #define CORENRN_RAN123_USE_UNIFIED_MEMORY false #endif namespace coreneuron { struct nrnran123_State { philox4x32_ctr_t c; philox4x32_ctr_t r; char which_; }; } // namespace coreneuron /** @brief Provide a helper function in global namespace that is declared target for OpenMP * offloading to function correctly with NVHPC */ nrn_pragma_acc(routine seq) nrn_pragma_omp(declare target) philox4x32_ctr_t coreneuron_random123_philox4x32_helper(coreneuron::nrnran123_State* s); nrn_pragma_omp(end declare target) namespace coreneuron { void nrnran123_initialise_global_state_on_device(); void nrnran123_destroy_global_state_on_device(); /* global index. eg. run number */ /* all generator instances share this global index */ void nrnran123_set_globalindex(uint32_t gix); uint32_t nrnran123_get_globalindex(); // Utilities used for calculating model size, only called from the CPU. std::size_t nrnran123_instance_count(); inline std::size_t nrnran123_state_size() { return sizeof(nrnran123_State); } /* routines for creating and deleting streams are called from cpu */ nrnran123_State* nrnran123_newstream3(uint32_t id1, uint32_t id2, uint32_t id3, bool use_unified_memory = CORENRN_RAN123_USE_UNIFIED_MEMORY); inline nrnran123_State* nrnran123_newstream( uint32_t id1, uint32_t id2, bool use_unified_memory = CORENRN_RAN123_USE_UNIFIED_MEMORY) { return nrnran123_newstream3(id1, id2, 0, use_unified_memory); } void nrnran123_deletestream(nrnran123_State* s, bool use_unified_memory = CORENRN_RAN123_USE_UNIFIED_MEMORY); /* minimal data stream */ constexpr void nrnran123_getseq(nrnran123_State* s, uint32_t* seq, char* which) { *seq = s->c.v[0]; *which = s->which_; } constexpr void nrnran123_getids(nrnran123_State* s, uint32_t* id1, uint32_t* id2) { *id1 = s->c.v[2]; *id2 = s->c.v[3]; } constexpr void nrnran123_getids3(nrnran123_State* s, uint32_t* id1, uint32_t* id2, uint32_t* id3) { *id3 = s->c.v[1]; *id1 = s->c.v[2]; *id2 = s->c.v[3]; } // Uniform 0 to 2*32-1 inline uint32_t nrnran123_ipick(nrnran123_State* s) { char which = s->which_; uint32_t rval{s->r.v[int{which++}]}; if (which > 3) { which = 0; s->c.v[0]++; s->r = coreneuron_random123_philox4x32_helper(s); } s->which_ = which; return rval; } constexpr double nrnran123_uint2dbl(uint32_t u) { constexpr double SHIFT32 = 1.0 / 4294967297.0; /* 1/(2^32 + 1) */ /* 0 to 2^32-1 transforms to double value in open (0,1) interval */ /* min 2.3283064e-10 to max (1 - 2.3283064e-10) */ return (static_cast(u) + 1.0) * SHIFT32; } // Uniform open interval (0,1), minimum value is 2.3283064e-10 and max value is 1-min inline double nrnran123_dblpick(nrnran123_State* s) { return nrnran123_uint2dbl(nrnran123_ipick(s)); } /* this could be called from openacc parallel construct (in INITIAL block) */ inline void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char which) { if (which > 3) { s->which_ = 0; } else { s->which_ = which; } s->c.v[0] = seq; s->r = coreneuron_random123_philox4x32_helper(s); } // nrnran123_negexp min value is 2.3283064e-10, max is 22.18071, mean 1.0 inline double nrnran123_negexp(nrnran123_State* s) { return -std::log(nrnran123_dblpick(s)); } /* at cost of a cached value we could compute two at a time. */ inline double nrnran123_normal(nrnran123_State* s) { double w, u1; do { u1 = nrnran123_dblpick(s); double u2{nrnran123_dblpick(s)}; u1 = 2. * u1 - 1.; u2 = 2. * u2 - 1.; w = (u1 * u1) + (u2 * u2); } while (w > 1); double y{std::sqrt((-2. * std::log(w)) / w)}; return u1 * y; } // nrnran123_gauss, nrnran123_iran were declared but not defined in CoreNEURON // nrnran123_array4x32 was declared but not used in CoreNEURON } // namespace coreneuron ================================================ FILE: coreneuron/utils/string_utils.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #include unsigned strcat_at_pos(char* dest, unsigned start_position, char* src, unsigned src_length) { memcpy(dest + start_position, src, src_length); dest[start_position + src_length] = '\0'; return start_position + src_length; } ================================================ FILE: coreneuron/utils/string_utils.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ /** * @file string_utils.h * @brief Utility functions for strings * */ #pragma once /** @brief Appends a copy of the source string to the destination string. * * A null-character is included at the end of the new string formed by the concatenation of both in * destination. It has similar behavior to strcat but better performance in case that it is needed * to append a char array to another very large char array. * * @param dest Destination string * @param start_position Position of dest to start writing src * @param src Source string * @param src_length Length of src to append to dest * @return Position of the final character of dest after appending src (including the null * terminating character) */ unsigned strcat_at_pos(char* dest, unsigned start_position, char* src, unsigned src_length); ================================================ FILE: coreneuron/utils/units.hpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= */ #pragma once namespace coreneuron { namespace units { #if CORENEURON_USE_LEGACY_UNITS == 1 constexpr double faraday{96485.309}; constexpr double gasconstant{8.3134}; #else /* NMODL translated MOD files get unit constants typically from * share/lib/nrnunits.lib.in. But there were other source files that hardcode * some of the constants. Here we gather a few modern units into a single place * (but, unfortunately, also in nrnunits.lib.in). Legacy units cannot be * gathered here because they can differ slightly from place to place. * * These come from https://physics.nist.gov/cuu/Constants/index.html. * Termed the "2018 CODATA recommended values", they became available * on 20 May 2019 and replace the 2014 CODATA set. * * See oc/hoc_init.c, nrnoc/eion.c, nrniv/kschan.h */ namespace detail { constexpr double electron_charge{1.602176634e-19}; // coulomb exact constexpr double avogadro_number{6.02214076e+23}; // exact constexpr double boltzmann{1.380649e-23}; // joule/K exact } // namespace detail constexpr double faraday{detail::electron_charge * detail::avogadro_number}; // 96485.33212... // coulomb/mol constexpr double gasconstant{detail::boltzmann * detail::avogadro_number}; // 8.314462618... // joule/mol-K #endif } // namespace units } // namespace coreneuron ================================================ FILE: coreneuron/utils/utils.cpp ================================================ /* # ============================================================================= # Copyright (c) 2021-22 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "utils.hpp" #include "coreneuron/apps/corenrn_parameters.hpp" namespace coreneuron { [[noreturn]] void nrn_abort(int errcode) { #if NRNMPI if (corenrn_param.mpi_enable && nrnmpi_initialized()) { nrnmpi_abort(errcode); } #endif std::abort(); } double nrn_wtime() { #if NRNMPI if (corenrn_param.mpi_enable) { return nrnmpi_wtime(); } else #endif { struct timeval time1; gettimeofday(&time1, nullptr); return (time1.tv_sec + time1.tv_usec / 1.e6); } } } // namespace coreneuron ================================================ FILE: coreneuron/utils/utils.hpp ================================================ /* # ============================================================================= # Copyright (c) 2021-22 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include "coreneuron/mpi/nrnmpi.h" #include "coreneuron/mpi/core/nrnmpi.hpp" namespace coreneuron { [[noreturn]] void nrn_abort(int errcode); template void nrn_fatal_error(const char* msg, Args&&... args) { if (nrnmpi_myid == 0) { printf(msg, std::forward(args)...); } nrn_abort(-1); } extern double nrn_wtime(void); } // namespace coreneuron ================================================ FILE: coreneuron/utils/utils_cuda.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include #include // From Random123 lib #define CHECKLAST(MSG) \ do { \ cudaError_t e = cudaGetLastError(); \ if (e != cudaSuccess) { \ fprintf(stderr, \ "%s:%d: CUDA Error: %s: %s\n", \ __FILE__, \ __LINE__, \ (MSG), \ cudaGetErrorString(e)); \ exit(1); \ } \ } while (0) #define CHECKCALL(RET) \ do { \ cudaError_t e = (RET); \ if (e != cudaSuccess) { \ fprintf(stderr, "%s:%d: CUDA Error: %s\n", __FILE__, __LINE__, cudaGetErrorString(e)); \ exit(1); \ } \ } while (0) ================================================ FILE: coreneuron/utils/vrecitem.h ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #pragma once #include "coreneuron/network/netcon.hpp" #include "coreneuron/utils/ivocvect.hpp" namespace coreneuron { class PlayRecord; #define PlayRecordType 0 #define VecPlayContinuousType 4 #define PlayRecordEventType 21 // used by PlayRecord subclasses that utilize discrete events class PlayRecordEvent: public DiscreteEvent { public: PlayRecordEvent() = default; virtual ~PlayRecordEvent() = default; virtual void deliver(double, NetCvode*, NrnThread*) override; virtual void pr(const char*, double t, NetCvode*) override; virtual NrnThread* thread(); PlayRecord* plr_; static unsigned long playrecord_send_; static unsigned long playrecord_deliver_; virtual int type() const override { return PlayRecordEventType; } }; // common interface for Play and Record for all integration methods. class PlayRecord { public: PlayRecord(double* pd, int ith); virtual ~PlayRecord() = default; virtual void play_init() {} // called near beginning of finitialize virtual void continuous(double) { } // play - every f(y, t) or res(y', y, t); record - advance_tn and initialize flag virtual void deliver(double, NetCvode*) {} // at associated DiscreteEvent virtual PlayRecordEvent* event() { return nullptr; } virtual void pr(); // print identifying info virtual int type() const { return PlayRecordType; } double* pd_; int ith_; // The thread index }; class VecPlayContinuous: public PlayRecord { public: VecPlayContinuous(double*, IvocVect&& yvec, IvocVect&& tvec, IvocVect* discon, int ith); virtual ~VecPlayContinuous(); virtual void play_init() override; virtual void deliver(double tt, NetCvode*) override; virtual PlayRecordEvent* event() override { return e_; } virtual void pr() override; void continuous(double tt) override; double interpolate(double tt); double interp(double th, double x0, double x1) { return x0 + (x1 - x0) * th; } void search(double tt); virtual int type() const override { return VecPlayContinuousType; } IvocVect y_; IvocVect t_; IvocVect* discon_indices_; std::size_t last_index_{}; std::size_t discon_index_{}; std::size_t ubound_index_{}; PlayRecordEvent* e_ = nullptr; // Need to be a raw pointer for acc }; } // namespace coreneuron ================================================ FILE: coreneuron/utils/vrecord.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include #include "coreneuron/nrnconf.h" #include "coreneuron/sim/multicore.hpp" #include "coreneuron/utils/ivocvect.hpp" #include "coreneuron/network/netcvode.hpp" #include "coreneuron/utils/vrecitem.h" namespace coreneuron { extern NetCvode* net_cvode_instance; void PlayRecordEvent::deliver(double tt, NetCvode* ns, NrnThread*) { plr_->deliver(tt, ns); } NrnThread* PlayRecordEvent::thread() { return nrn_threads + plr_->ith_; } void PlayRecordEvent::pr(const char* s, double tt, NetCvode*) { printf("%s PlayRecordEvent %.15g ", s, tt); plr_->pr(); } PlayRecord::PlayRecord(double* pd, int ith) : pd_(pd) , ith_(ith) {} void PlayRecord::pr() { printf("PlayRecord\n"); } VecPlayContinuous::VecPlayContinuous(double* pd, IvocVect&& yvec, IvocVect&& tvec, IvocVect* discon, int ith) : PlayRecord(pd, ith) , y_(std::move(yvec)) , t_(std::move(tvec)) , discon_indices_(discon) , e_(new PlayRecordEvent{}) { e_->plr_ = this; } VecPlayContinuous::~VecPlayContinuous() { delete e_; } void VecPlayContinuous::play_init() { NrnThread* nt = nrn_threads + ith_; last_index_ = 0; discon_index_ = 0; if (discon_indices_) { if (discon_indices_->size() > 0) { ubound_index_ = (int) (*discon_indices_)[discon_index_++]; // printf("play_init %d %g\n", ubound_index_, t_->elem(ubound_index_)); e_->send(t_[ubound_index_], net_cvode_instance, nt); } else { ubound_index_ = t_.size() - 1; } } else { ubound_index_ = 0; e_->send(t_[ubound_index_], net_cvode_instance, nt); } } void VecPlayContinuous::deliver(double tt, NetCvode* ns) { NrnThread* nt = nrn_threads + ith_; // printf("deliver %g\n", tt); last_index_ = ubound_index_; // clang-format off nrn_pragma_acc(update device(last_index_) if (nt->compute_gpu)) nrn_pragma_omp(target update to(last_index_) if (nt->compute_gpu)) // clang-format on if (discon_indices_) { if (discon_index_ < discon_indices_->size()) { ubound_index_ = (int) (*discon_indices_)[discon_index_++]; // printf("after deliver:send %d %g\n", ubound_index_, t_->elem(ubound_index_)); e_->send(t_[ubound_index_], ns, nt); } else { ubound_index_ = t_.size() - 1; } } else { if (ubound_index_ < t_.size() - 1) { ubound_index_++; e_->send(t_[ubound_index_], ns, nt); } } // clang-format off nrn_pragma_acc(update device(ubound_index_) if (nt->compute_gpu)) nrn_pragma_omp(target update to(ubound_index_) if (nt->compute_gpu)) // clang-format on continuous(tt); } void VecPlayContinuous::continuous(double tt) { #ifdef CORENEURON_ENABLE_GPU NrnThread* nt = nrn_threads + ith_; #endif // clang-format off nrn_pragma_acc(kernels present(this) if(nt->compute_gpu)) nrn_pragma_omp(target if(nt->compute_gpu)) { *pd_ = interpolate(tt); } // clang-format on } double VecPlayContinuous::interpolate(double tt) { if (tt >= t_[ubound_index_]) { last_index_ = ubound_index_; if (last_index_ == 0) { // printf("return last tt=%g ubound=%g y=%g\n", tt, t_->elem(ubound_index_), // y_->elem(last_index_)); return y_[last_index_]; } } else if (tt <= t_[0]) { last_index_ = 0; // printf("return elem(0) tt=%g t0=%g y=%g\n", tt, t_->elem(0), y_->elem(0)); return y_[0]; } else { search(tt); } double x0 = y_[last_index_ - 1]; double x1 = y_[last_index_]; double t0 = t_[last_index_ - 1]; double t1 = t_[last_index_]; // printf("IvocVectRecorder::continuous tt=%g t0=%g t1=%g theta=%g x0=%g x1=%g\n", tt, t0, t1, // (tt - t0)/(t1 - t0), x0, x1); if (t0 == t1) { return (x0 + x1) / 2.; } return interp((tt - t0) / (t1 - t0), x0, x1); } void VecPlayContinuous::search(double tt) { // assert (tt > t_->elem(0) && tt < t_->elem(t_->size() - 1)) while (tt < t_[last_index_]) { --last_index_; } while (tt >= t_[last_index_]) { ++last_index_; } } void VecPlayContinuous::pr() { printf("VecPlayContinuous "); // printf("%s.x[%d]\n", hoc_object_name(y_->obj_), last_index_); } } // namespace coreneuron ================================================ FILE: docs/Doxyfile.in ================================================ # Doxyfile 1.8.15 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "CoreNEURON" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. #PROJECT_LOGO = @PROJECT_SOURCE_DIR@/docs/logo.png # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = @CMAKE_CURRENT_BINARY_DIR@/docs # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all generated output in the proper direction. # Possible values are: None, LTR, RTL and Context. # The default value is: None. OUTPUT_TEXT_DIRECTION = None # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = YES # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. # When you need a literal { or } or , in the value part of an alias you have to # escape them by means of a backslash (\), this can lead to conflicts with the # commands \{ and \} for these it is advised to use the version @{ and @} or use # a double escape (\\{ and \\}) ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, # Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files), VHDL, tcl. For instance to make doxygen treat # .inc files as Fortran files (default is PHP), and .f files as C (default is # Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = .yaml=Python # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 0. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 0 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = YES # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = YES # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = @PROJECT_SOURCE_DIR@/docs/DoxygenLayout.xml # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = YES # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. If # EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = @PROJECT_SOURCE_DIR@/coreneuron INPUT += @PROJECT_SOURCE_DIR@/tests # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: https://www.gnu.org/software/libiconv/) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, # *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.ipp \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.markdown \ *.md \ *.mm \ *.dox \ *.yaml \ # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. INPUT += ../README.md USE_MDFILE_AS_MAINPAGE = ../README.md #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = NO # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = doxygen # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = @PROJECT_SOURCE_DIR@/docs/footer.html # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. # HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 344 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via Javascript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have Javascript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: https://developer.apple.com/xcode/), introduced with OSX # 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. # The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /

================================================ FILE: docs/index.rst ================================================ Welcome to CoreNEURON's documentation! ================================== .. toctree:: :maxdepth: 2 :caption: User documentation: userdoc/BinaryFormat/BinaryFormat.md userdoc/MemoryManagement/bbcorepointer.md .. toctree:: :maxdepth: 2 :caption: Developer documentation: doxygen Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/userdoc/BinaryFormat/BinaryFormat.md ================================================ ## CoreNEURON Input Binary File Format NEURON is used for building in-memory model of the network. The in-memory representation of model is then dumped to binary files and read by CoreNEURON. The abstract structure of these binary files is shown : ![Binary File Format](binary_file_format.jpg). > Note : additional datasets are being added for additional functionality (e.g. Gap Junctions). This dcoumentation / format will be updated in the future. ================================================ FILE: docs/userdoc/MemoryManagement/bbcorepointer.md ================================================ ## Transferring dynamically allocated data between NEURON and CoreNEURON User-allocated data can be managed in NMODL using the `POINTER` type. It allows the programmer to reference data that has been allocated in HOC or in VERBATIM blocks. This allows for more advanced data-structures that are not natively supported in NMODL. Since NEURON itself has no knowledge of the layout and size of this data it cannot transfer `POINTER` data automatically to CoreNEURON. Furtheremore, in many cases there is no need to transfer the data between the two instances. In some cases, however, the programmer would like to transfer certain user-defined data into CoreNEURON. The most prominent example are random123 RNG stream parameters used in synapse mechanisms. To support this use-case the `BBCOREPOINTER` type was introduced. Variables that are declared as `BBCOREPOINTER` behave exactly the same as `POINTER` but are additionally taken into account when NEURON is serializing mechanism data (for file writing or direct-memory transfer). For NEURON to be able to write (and indeed CoreNEURON to be able to read) `BBCOREPOINTER` data, the programmer has to additionally provide two C functions that are called as part of the serialization/deserialization. ``` static void bbcore_write(double* x, int* d, int* d_offset, int* x_offset, _threadargsproto_); static void bbcore_read(double* x, int* d, int* d_offset, int* x_offset, _threadargsproto_); ``` The implementation of `bbcore_write` and `bbcore_read` determines the serialization and deserialization of the per-instance mechanism data referenced through the various `BBCOREPOINTER`s. NEURON will call `bbcore_write` twice per mechanism instance. In a first sweep, the call is used to determine the required memory to be allocated on the serialization arrays. In the second sweep the call is used to fill in the data per mechanism instance. The functions take following arguments * `x`: A `double` type array that will be allocated by NEURON to fill with real-valued data. In the first call, `x` is NULL as it has not been allocated yet. * `d`: An `int` type array that will be allocated by NEURON to fill with integer-valued data. In the first call, `d` is NULL as it has not been allocated yet. * `x_offset`: The offset in `x` at which the mechanism instance should write its real-valued `BBCOREPOINTER` data. In the first call this is an output argument that is expected to be updated by the per-instance size to be allocated. * `d_offset`: The offset in `x` at which the mechanism instance should write its integer-valued `BBCOREPOINTER` data. In the first call this is an output argument that is expected to be updated by the per-instance size to be allocated. * `_threadargsproto_`: a macro placeholder for NEURON/CoreNEURON data-structure parameters. They are typically only used through generated defines and not by the programmer. The macro is defined as follows: ``` #define _threadargsproto_ \ int _iml, int _cntml_padded, double *_p, Datum *_ppvar, ThreadDatum *_thread, NrnThread *_nt, \ double _v ``` Putting all of this together, the following is a minimal MOD using BBCOREPOINTER: ``` TITLE A BBCOREPOINTER Example NEURON { BBCOREPOINTER my_data } ASSIGNED { my_data } : Do something interesting with my_data ... VERBATIM static void bbcore_write(double* x, int* d, int* x_offset, int* d_offset, _threadargsproto_) { if (x) { double* x_i = x + *x_offset; x_i[0] = _p_my_data[0]; x_i[1] = _p_my_data[1]; } *x_offset += 2; // reserve 2 doubles on serialization buffer x } static void bbcore_read(double* x, int* d, int* x_offset, int* d_offset, _threadargsproto_) { assert(!_p_my_data); double* x_i = x + *x_offset; // my_data needs to be allocated somehow _p_my_data = (double*)malloc(sizeof(double)*2); _p_my_data[0] = x_i[0]; _p_my_data[1] = x_i[1]; *x_offset += 2; } ENDVERBATIM ``` ================================================ FILE: extra/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= # ============================================================================= # Copy first into build directory as it will be used for special-core # ============================================================================= configure_file(nrnivmodl_core_makefile.in ${CMAKE_BINARY_DIR}/share/coreneuron/nrnivmodl_core_makefile @ONLY) configure_file(nrnivmodl-core.in ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core @ONLY) # nrnivmodl-core depends on the building of NMODL_TARGET_TO_DEPEND and the configuration of the # nrnivmodl-core and nrnivmodl_core_makefile this doesn't imply that whenever there is a change in # one of those files then the prebuilt mod files are going to be rebuilt add_custom_target( nrnivmodl-core ALL DEPENDS ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core ${CMAKE_BINARY_DIR}/share/coreneuron/nrnivmodl_core_makefile ${NMODL_TARGET_TO_DEPEND}) # ============================================================================= # Install for end users # ============================================================================= install(FILES ${CMAKE_BINARY_DIR}/share/coreneuron/nrnivmodl_core_makefile DESTINATION share/coreneuron) install(PROGRAMS ${CMAKE_BINARY_DIR}/bin/nrnivmodl-core DESTINATION bin) ================================================ FILE: extra/instrumentation.tau ================================================ BEGIN_INCLUDE_LIST double nrnmpi_dbl_allreduce(double, int) int coreneuron::main(int, char **, char **) int coreneuron::nrnmpi_bgp_conserve(int, int) int coreneuron::nrnmpi_bgp_single_advance(NRNMPI_Spike *) int coreneuron::nrnmpi_spike_exchange(int*, NRNMPI_Spike*) int main(int, char **, char **) size_t nrnbbcore_write() void coreneuron::*nrn_fixed_step_group_thread(coreneuron::NrnThread *) void coreneuron::*nrn_fixed_step_lastpart(coreneuron::NrnThread *) void coreneuron::*nrn_fixed_step_thread(coreneuron::NrnThread *) void coreneuron::*nrn_ms_bksub(coreneuron::NrnThread *) void coreneuron::*nrn_ms_bksub_through_triang(coreneuron::NrnThread *) void coreneuron::*nrn_ms_reduce_solve(coreneuron::NrnThread *) void coreneuron::*nrn_ms_treeset_through_triang(coreneuron::NrnThread *) void coreneuron::*setup_tree_matrix(coreneuron::NrnThread *) void coreneuron::*setup_tree_matrix_minimal(coreneuron::NrnThread *) void coreneuron::BBS::netpar_solve(double) void coreneuron::BBS_netpar_solve(double) void coreneuron::NetParEvent::deliver(double, NetCvode *, coreneuron::NrnThread *) void coreneuron::NetParEvent::send(double, NetCvode *, coreneuron::NrnThread *) void coreneuron::_nrn_cur#(coreneuron::NrnThread *, coreneuron::Memb_list *, int) void coreneuron::_nrn_jacob#(coreneuron::NrnThread *, coreneuron::Memb_list *, int) void coreneuron::_nrn_state#(coreneuron::NrnThread *, coreneuron::Memb_list *, int) void coreneuron::all_wait_for_spike_exchange() void coreneuron::bksub(coreneuron::NrnThread *) void coreneuron::deliver_net_events(coreneuron::NrnThread *) void coreneuron::determine_inputpresyn() void coreneuron::finitialize(void) void coreneuron::ncs2nrn_integrate(double) void coreneuron::nonvint(coreneuron::NrnThread *) void coreneuron::nrn2ncs_outputevent(int, double) void coreneuron::nrn_cap_jacob(coreneuron::NrnThread *, Memb_list *) void coreneuron::nrn_cleanup_presyn(PreSyn *) void coreneuron::nrn_deliver_events(coreneuron::NrnThread *) void coreneuron::nrn_finitialize(int, double) void coreneuron::nrn_fixed_step_group(int) void coreneuron::nrn_fixed_step_group_minimal(int) void coreneuron::nrn_fixed_single_steps_minimal(int, double) void coreneuron::nrn_flush_reports(double) void coreneuron::nrn_lhs(coreneuron::NrnThread *) void coreneuron::nrn_multithread_job(void *(*)(coreneuron::NrnThread *)) void coreneuron::nrn_promote() void coreneuron::nrn_rhs(coreneuron::NrnThread *) void coreneuron::nrn_setup(const char *, const char *, int, int) void coreneuron::nrn_solve(coreneuron::NrnThread *) void coreneuron::nrn_solve_minimal(coreneuron::NrnThread *) void coreneuron::nrn_spike_exchange(coreneuron::NrnThread *) void coreneuron::nrn_spike_exchange_init() void coreneuron::nrnmpi_barrier() void coreneuron::nrnmpi_bgp_multisend(NRNMPI_Spike *, int, int *) void coreneuron::nrnmpi_int_gather(int *, int *, int, int) void coreneuron::nrnmpi_int_gatherv(int *, int, int *, int *, int *, int) void coreneuron::nrnmpi_postrecv_doubles(double *, int, int, int, void **) void coreneuron::nrnmpi_send_doubles(double *, int, int, int) void coreneuron::nrnmpi_spike_initialize() void coreneuron::nrnmpi_wait(void **) void coreneuron::output_spikes(const char *) void coreneuron::output_spikes_parallel(const char *) void coreneuron::read_phase1(data_reader &, coreneuron::NrnThread &) void coreneuron::read_phase2(data_reader &, coreneuron::NrnThread &) void coreneuron::setup_report_engine(double, double) void coreneuron::solve_interleaved1(int) void coreneuron::triang(coreneuron::NrnThread *) void coreneuron::triang_interleaved(coreneuron::NrnThread *, int, int, int, int *, int *) void coreneuron::update(coreneuron::NrnThread *) void coreneuron::write_checkpoint(coreneuron::NrnThread *, int, const char *, bool) void coreneuron::write_checkpoint(coreneuron::NrnThread *, int, const char*, bool) void coreneuron::write_nrnthread(const char *, coreneuron::NrnThread &, nrncore_CellGroup &) void coreneuron::write_nrnthread_task(const char *, nrncore_CellGroup *) END_INCLUDE_LIST ================================================ FILE: extra/nrnivmodl-core.in ================================================ #!/bin/bash # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= set -e # TODO : mod2c_core can be linked with (HPE-)MPI library # and running that under slurm allocation result into # runtime error. For now, unset PMI_RANK variable # which is sufficint to avoid issue with HPE-MPI+SLURM. unset PMI_RANK # name of the script APP_NAME="$(basename "$0")" # directory and parent directory of this script PARENT_DIR="$(dirname "$BASH_SOURCE")/.." # prefer perl exe set by neuron wrappers in case of wheel PERL_EXE="${CORENRN_PERLEXE:-@PERL_EXECUTABLE@}" # in case of mac installer, wrapper is not used and hence # check if binary exist. otherwise, just rely on perl being # in default $PATH if [ ! -f "${PERL_EXE}" ]; then PERL_EXE="$(which perl)"; fi ROOT_DIR="$("${PERL_EXE}" -e "use Cwd 'abs_path'; print abs_path('$PARENT_DIR')")" # default arguments : number of parallel builds and default mod file path PARALLEL_BUILDS=4 params_MODS_PATH="." params_BUILD_TYPE="@COMPILE_LIBRARY_TYPE@" params_NRN_PRCELLSTATE="@CORENRN_NRN_PRCELLSTATE@" # prefix for common options : make sure to rename these if options are changed. MAKE_OPTIONS="MECHLIB_SUFFIX MOD2CPP_BINARY MOD2CPP_RUNTIME_FLAGS DESTDIR INCFLAGS LINKFLAGS MODS_PATH VERBOSE BUILD_TYPE NRN_PRCELLSTATE" # parse CLI args while getopts "n:m:a:d:i:l:Vp:r:b:h" OPT; do case "$OPT" in n) # suffix for mechanism library params_MECHLIB_SUFFIX="$OPTARG";; m) # nmodl or mod2c binary to use params_MOD2CPP_BINARY="$OPTARG";; a) # additional nmodl flags to be used params_MOD2CPP_RUNTIME_FLAGS="$OPTARG";; d) # destination install directory params_DESTDIR="$OPTARG";; i) # extra include flags params_INCFLAGS="$OPTARG";; l) # extra link flags params_LINKFLAGS="$OPTARG";; V) # make with verbose params_VERBOSE=1;; p) # option for parallel build (with -j) PARALLEL_BUILDS="$OPTARG";; b) # make with verbose params_BUILD_TYPE="$OPTARG";; r) # enable NRN_PRCELLSTATE mechanism params_NRN_PRCELLSTATE="$OPTARG";; h) echo "$APP_NAME [options, ...] [mods_path]" echo "Options:" echo " -n The model name, used as a suffix in the shared library" echo " -m NMODL/mod2c code generation compiler path" echo " -a Runtime flags for NMODL/mod2c" echo " -i Definitions passed to the compiler, typically '-I dir..'" echo " -l Definitions passed to the linker, typically '-Lx -lylib..'" echo " -d Install to dest_dir. Default: Off." echo " -r <0|1> Enable NRN_PRCELLSTATE mechanism. Default: @CORENRN_NRN_PRCELLSTATE@." echo " -V Verbose: show commands executed by make" echo " -p Number of parallel builds (Default: $PARALLEL_BUILDS)" echo " -b libcorenrnmech library type" exit 0;; ?) exit 1;; esac done # consume an option shift $(($OPTIND - 1)) # only one mod files directory is supported in neuron and coreneuron if [ $# -gt 1 ]; then echo "[ERROR] $APP_NAME expects at most one mod dir. See syntax: '$APP_NAME -h' " exit 1 fi # if defined mods dir be in $1 if [ $# -eq 1 ]; then params_MODS_PATH="$1" fi shopt -s nullglob # warn if no mod files provided if [ -d "$params_MODS_PATH" ]; then files=( "$params_MODS_PATH"/*.mod ) if [ ${#files} -eq 0 ]; then echo "WARNING: No mod files found in '$(realpath ${params_MODS_PATH})', compiling default ones only!" fi else echo "FATAL: Invalid mods directory: '$params_MODS_PATH'" exit 1 fi # temporary directory where mod files will be copied temp_mod_dir="@CMAKE_HOST_SYSTEM_PROCESSOR@/corenrn/mod2c" mkdir -p "$temp_mod_dir" # copy mod files with include files. note that ${ROOT_DIR}/share # has inbuilt mod files and user provided mod files are in $params_MODS_PATH. set +e for mod_dir in "${ROOT_DIR}/share/modfile" "$params_MODS_PATH" ; do # copy mod files and include files files=( "$mod_dir/"*.mod "$mod_dir/"*.inc "$mod_dir/"*.h* ) for f in "${files[@]}"; do # copy mod files only if it's changed (to avoid rebuild) target_file_path="$temp_mod_dir/$(basename "$f")" if ! diff -q "$f" "$target_file_path" &>/dev/null; then cp "$f" "$target_file_path" fi done done set -e # use new mod files directory for compilation params_MODS_PATH="$temp_mod_dir" # build params to make command make_params=("ROOT=${ROOT_DIR}") for param in $MAKE_OPTIONS; do var="params_${param}" if [ "${!var+x}" ]; then make_params+=("$param=${!var}") fi done # if -d (deploy) provided, call "make install" if [ "$params_DESTDIR" ]; then make_params+=("install") fi if [ "$params_VERBOSE" ]; then make_params+=("VERBOSE=1") fi # run makefile echo "[INFO] Running: make -j$PARALLEL_BUILDS -f ${ROOT_DIR}/share/coreneuron/nrnivmodl_core_makefile ${make_params[@]}" make -j$PARALLEL_BUILDS -f "${ROOT_DIR}/share/coreneuron/nrnivmodl_core_makefile" "${make_params[@]}" echo "[INFO] MOD files built successfully for CoreNEURON" ================================================ FILE: extra/nrnivmodl_core_makefile.in ================================================ # This Makefile has the rules necessary for making the custom version of # CoreNEURON executable called "special-core" from the provided mod files. # Mod files are looked up in the MODS_PATH directory. # Current system OS OS_NAME := $(shell uname) # ","" is an argument separator, never as a literal for Makefile rule COMMA_OP =, # Default variables for various targets MECHLIB_SUFFIX = MODS_PATH = . OUTPUT_DIR = @CMAKE_HOST_SYSTEM_PROCESSOR@ DESTDIR = TARGET_LIB_TYPE = $(BUILD_TYPE) # required for OSX to execute nrnivmodl-core ifeq ($(origin SDKROOT), undefined) export SDKROOT := $(shell xcrun --sdk macosx --show-sdk-path) endif # CoreNEURON installation directories CORENRN_BIN_DIR := $(ROOT)/bin CORENRN_LIB_DIR := $(ROOT)/lib CORENRN_INC_DIR := $(ROOT)/include CORENRN_SHARE_CORENRN_DIR:= $(ROOT)/share/coreneuron CORENRN_SHARE_MOD2CPP_DIR := $(ROOT)/share/mod2c # name of the CoreNEURON binary SPECIAL_EXE = $(OUTPUT_DIR)/special-core # Directory where cpp files are generated for each mod file MOD_TO_CPP_DIR = $(OUTPUT_DIR)/corenrn/mod2c # Directory where cpp files are compiled MOD_OBJS_DIR = $(OUTPUT_DIR)/corenrn/build # Linked libraries gathered by CMake LDFLAGS = $(LINKFLAGS) @CORENRN_COMMON_LDFLAGS@ # Includes paths gathered by CMake # coreneuron/utils/randoms goes first because it needs to override the NEURON # directory in INCFLAGS INCLUDES = -I$(CORENRN_INC_DIR)/coreneuron/utils/randoms $(INCFLAGS) -I$(CORENRN_INC_DIR) ifeq (@CORENRN_ENABLE_MPI_DYNAMIC@, OFF) INCLUDES += $(if @MPI_CXX_INCLUDE_PATH@, -I$(subst ;, -I,@MPI_CXX_INCLUDE_PATH@),) endif INCLUDES += $(if @reportinglib_INCLUDE_DIR@, -I$(subst ;, -I,@reportinglib_INCLUDE_DIR@),) # CXX is always defined. If the definition comes from default change it ifeq ($(origin CXX), default) CXX = @CMAKE_CXX_COMPILER@ endif ifeq (@CORENRN_ENABLE_GPU@, ON) ifneq ($(shell $(CXX) --version | grep -o nvc++), nvc++) $(error GPU wheels are only compatible with the NVIDIA C++ compiler nvc++, but CXX=$(CXX) and --version gives $(shell $(CXX) --version)) endif # nvc++ -dumpversion is simpler, but only available from 22.2 ifeq ($(findstring nvc++ @CORENRN_NVHPC_MAJOR_MINOR_VERSION@, $(shell $(CXX) --version)),) $(error GPU wheels are currently not compatible across NVIDIA HPC SDK versions. You have $(shell $(CXX) -V | grep nvc++) but this wheel was built with @CORENRN_NVHPC_MAJOR_MINOR_VERSION@.) endif endif # In case of wheel, python and perl exe paths are from the build machine. # First prefer env variables set by neuron's nrnivmodl wrapper then check # binary used during build. If they don't exist then simply use python and # perl as the name of binaries. CORENRN_PYTHONEXE ?= @PYTHON_EXECUTABLE@ CORENRN_PERLEXE ?= @PERL_EXECUTABLE@ ifeq ($(wildcard $(CORENRN_PYTHONEXE)),) CORENRN_PYTHONEXE=python endif ifeq ($(wildcard $(CORENRN_PERLEXE)),) CORENRN_PERLEXE=perl endif CXXFLAGS = @CORENRN_CXX_FLAGS@ CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ $(INCLUDES) CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@ CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@ # env variables required for mod2c or nmodl MOD2CPP_ENV_VAR = @CORENRN_SANITIZER_ENABLE_ENVIRONMENT_STRING@ PYTHONPATH=@CORENRN_NMODL_PYTHONPATH@:${CORENRN_LIB_DIR}/python MODLUNIT=$(CORENRN_SHARE_MOD2CPP_DIR)/nrnunits.lib # nmodl options ifeq (@CORENRN_ENABLE_NMODL@, ON) ifeq (@CORENRN_ENABLE_GPU@, ON) nmodl_arguments_c=@NMODL_ACC_BACKEND_ARGS@ @NMODL_COMMON_ARGS@ else nmodl_arguments_c=@NMODL_CPU_BACKEND_ARGS@ @NMODL_COMMON_ARGS@ endif endif # name of the mechanism library with suffix if provided COREMECH_LIB_NAME = corenrnmech$(if $(MECHLIB_SUFFIX),_$(MECHLIB_SUFFIX),) COREMECH_LIB_PATH = $(OUTPUT_DIR)/lib$(COREMECH_LIB_NAME)$(LIB_SUFFIX) # Various header and C++/Object file MOD_FUNC_CPP = $(MOD_TO_CPP_DIR)/_mod_func.cpp MOD_FUNC_OBJ = $(MOD_OBJS_DIR)/_mod_func.o ENGINEMECH_OBJ = $(MOD_OBJS_DIR)/enginemech.o # Depending on static/shared build, determine library name and it's suffix ifeq ($(TARGET_LIB_TYPE), STATIC) LIB_SUFFIX = @CMAKE_STATIC_LIBRARY_SUFFIX@ corenrnmech_lib_target = coremech_lib_static else LIB_SUFFIX = @CMAKE_SHARED_LIBRARY_SUFFIX@ corenrnmech_lib_target = coremech_lib_shared endif # Binary of MOD2C/NMODL depending on CMake option activated ifeq (@nmodl_FOUND@, TRUE) MOD2CPP_BINARY_PATH = $(if $(MOD2CPP_BINARY),$(MOD2CPP_BINARY), @CORENRN_MOD2CPP_BINARY@) INCLUDES += -I@CORENRN_MOD2CPP_INCLUDE@ else MOD2CPP_BINARY_PATH = $(if $(MOD2CPP_BINARY),$(MOD2CPP_BINARY), $(CORENRN_BIN_DIR)/@nmodl_binary_name@) endif # MOD files with full path, without path and names without .mod extension mod_files_paths = $(sort $(wildcard $(MODS_PATH)/*.mod)) mod_files_names = $(sort $(notdir $(wildcard $(MODS_PATH)/*.mod))) mod_files_no_ext = $(mod_files_names:.mod=) mod_files_for_cpp_backend = $(foreach mod_file, $(mod_files_paths), $(addprefix $(MOD_TO_CPP_DIR)/, $(notdir $(mod_file)))) # CPP files and their obkects mod_cpp_files = $(patsubst %.mod,%.cpp,$(mod_files_for_cpp_backend)) mod_cpp_objs = $(addprefix $(MOD_OBJS_DIR)/,$(addsuffix .o,$(basename $(mod_files_no_ext)))) # We use $ORIGIN (@loader_path in OSX) ORIGIN_RPATH := $(if $(filter Darwin,$(OS_NAME)),@loader_path,$$ORIGIN) SONAME_OPTION := -Wl,$(if $(filter Darwin,$(OS_NAME)),-install_name${COMMA_OP}@rpath/,-soname${COMMA_OP})$(notdir ${COREMECH_LIB_PATH}) LIB_RPATH = $(if $(DESTDIR),$(DESTDIR)/lib,$(ORIGIN_RPATH)) # When special-core is installed, it needs to find library in the # lib folder of install prefix. We use relative path in order it # to be portable when files are moved (e.g. python wheel) INSTALL_LIB_RPATH = $(ORIGIN_RPATH)/../lib # All objects used during build ALL_OBJS = $(MOD_FUNC_OBJ) $(mod_cpp_objs) # Colors for pretty printing C_RESET := \033[0m C_GREEN := \033[32m # Default nmodl flags. Override if MOD2CPP_RUNTIME_FLAGS is not empty ifeq (@CORENRN_ENABLE_NMODL@, ON) MOD2CPP_FLAGS_C = $(if $(MOD2CPP_RUNTIME_FLAGS),$(MOD2CPP_RUNTIME_FLAGS),$(nmodl_arguments_c)) endif $(info Default NMODL flags: @nmodl_arguments_c@) ifneq ($(MOD2CPP_RUNTIME_FLAGS),) $(warning Runtime nmodl flags (they replace the default ones): $(MOD2CPP_RUNTIME_FLAGS)) endif # ======== MAIN BUILD RULES ============ # main target to build binary $(SPECIAL_EXE): $(corenrnmech_lib_target) @printf " => $(C_GREEN)Binary$(C_RESET) creating $(SPECIAL_EXE)\n" $(CXX_LINK_EXE_CMD) -o $(SPECIAL_EXE) $(CORENRN_SHARE_CORENRN_DIR)/coreneuron.cpp \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ -L$(OUTPUT_DIR) -l$(COREMECH_LIB_NAME) $(LDFLAGS) \ -L$(CORENRN_LIB_DIR) \ -Wl,-rpath,'$(LIB_RPATH)' -Wl,-rpath,$(CORENRN_LIB_DIR) -Wl,-rpath,'$(INSTALL_LIB_RPATH)' $(ENGINEMECH_OBJ): $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp | $(MOD_OBJS_DIR) $(CXX_COMPILE_CMD) -c -DADDITIONAL_MECHS $(CORENRN_SHARE_CORENRN_DIR)/enginemech.cpp -o $(ENGINEMECH_OBJ) # build shared library of mechanisms coremech_lib_shared: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always # extract the object files from libcoreneuron-core.a mkdir -p $(MOD_OBJS_DIR)/libcoreneuron-core rm -f $(MOD_OBJS_DIR)/libcoreneuron-core/*.o # --output is only supported by modern versions of ar (cd $(MOD_OBJS_DIR)/libcoreneuron-core && ar x $(CORENRN_LIB_DIR)/libcoreneuron-core.a) $(CXX_SHARED_LIB_CMD) $(ENGINEMECH_OBJ) -o ${COREMECH_LIB_PATH} $(ALL_OBJS) \ -I$(CORENRN_INC_DIR) $(INCFLAGS) \ @CORENEURON_LINKER_START_GROUP@ \ $(MOD_OBJS_DIR)/libcoreneuron-core/*.o @CORENEURON_LINKER_END_GROUP@ \ $(LDFLAGS) ${SONAME_OPTION} \ -Wl,-rpath,$(CORENRN_LIB_DIR) -L$(CORENRN_LIB_DIR) # cleanup rm $(MOD_OBJS_DIR)/libcoreneuron-core/*.o # build static library of mechanisms coremech_lib_static: $(ALL_OBJS) $(ENGINEMECH_OBJ) build_always # make a libcorenrnmech.a by copying libcoreneuron-core.a and then appending # the newly compiled objects cp $(CORENRN_LIB_DIR)/libcoreneuron-core.a ${COREMECH_LIB_PATH} ar r ${COREMECH_LIB_PATH} $(ENGINEMECH_OBJ) $(ALL_OBJS) # compile cpp files to .o $(MOD_OBJS_DIR)/%.o: $(MOD_TO_CPP_DIR)/%.cpp | $(MOD_OBJS_DIR) $(CXX_COMPILE_CMD) -c $< -o $@ -DNRN_PRCELLSTATE=$(NRN_PRCELLSTATE) @CORENEURON_TRANSLATED_CODE_COMPILE_FLAGS@ # translate MOD files to CPP using mod2c/NMODL $(mod_cpp_files): $(MOD_TO_CPP_DIR)/%.cpp: $(MODS_PATH)/%.mod | $(MOD_TO_CPP_DIR) $(MOD2CPP_ENV_VAR) $(MOD2CPP_BINARY_PATH) $< -o $(MOD_TO_CPP_DIR)/ $(MOD2CPP_FLAGS_C) # generate mod registration function. Dont overwrite if it's not changed $(MOD_FUNC_CPP): build_always | $(MOD_TO_CPP_DIR) $(CORENRN_PERLEXE) $(CORENRN_SHARE_CORENRN_DIR)/mod_func.c.pl $(mod_files_names) > $(MOD_FUNC_CPP).tmp diff -q $(MOD_FUNC_CPP).tmp $(MOD_FUNC_CPP) || \ mv $(MOD_FUNC_CPP).tmp $(MOD_FUNC_CPP) # symlink to cpp files provided by coreneuron $(MOD_TO_CPP_DIR)/%.cpp: $(CORENRN_SHARE_MOD2CPP_DIR)/%.cpp | $(MOD_TO_CPP_DIR) ln -s $< $@ # create directories needed $(MOD_TO_CPP_DIR): mkdir -p $(MOD_TO_CPP_DIR) $(MOD_OBJS_DIR): mkdir -p $(MOD_OBJS_DIR) # install binary and libraries install: $(SPECIAL_EXE) install -d $(DESTDIR)/bin $(DESTDIR)/lib install ${COREMECH_LIB_PATH} $(DESTDIR)/lib install $(SPECIAL_EXE) $(DESTDIR)/bin .PHONY: build_always $(VERBOSE).SILENT: # delete cpp files if mod2c error, otherwise they are not generated again .DELETE_ON_ERROR: ================================================ FILE: tests/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2021 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= include(TestHelpers) include_directories(${CORENEURON_PROJECT_SOURCE_DIR} ${CORENEURON_PROJECT_BINARY_DIR}/generated ${Boost_INCLUDE_DIRS}) # Add compiler flags that should apply to all CoreNEURON targets, but which should not leak into # other included projects. add_compile_definitions(${CORENRN_COMPILE_DEFS}) add_compile_options(${CORENRN_EXTRA_CXX_FLAGS}) add_link_options(${CORENRN_EXTRA_LINK_FLAGS}) if(NOT Boost_USE_STATIC_LIBS) add_definitions(-DBOOST_TEST_DYN_LINK=TRUE) endif() set(CMAKE_BUILD_RPATH ${CMAKE_BINARY_DIR}/bin/${CMAKE_HOST_SYSTEM_PROCESSOR}) set(Boost_NO_BOOST_CMAKE TRUE) # Minimum set by needing the multi-argument version of BOOST_AUTO_TEST_CASE. find_package(Boost 1.59 QUIET COMPONENTS filesystem system atomic unit_test_framework) if(Boost_FOUND) if(CORENRN_ENABLE_UNIT_TESTS) add_library(coreneuron-unit-test INTERFACE) target_compile_options(coreneuron-unit-test INTERFACE ${CORENEURON_BOOST_UNIT_TEST_COMPILE_FLAGS}) target_include_directories(coreneuron-unit-test SYSTEM INTERFACE ${Boost_INCLUDE_DIRS}) target_link_libraries(coreneuron-unit-test INTERFACE coreneuron-all) add_subdirectory(unit/cmdline_interface) add_subdirectory(unit/interleave_info) add_subdirectory(unit/alignment) add_subdirectory(unit/queueing) add_subdirectory(unit/solver) # lfp test uses nrnmpi_* wrappers but does not load the dynamic MPI library TODO: re-enable # after NEURON and CoreNEURON dynamic MPI are merged if(NOT CORENRN_ENABLE_MPI_DYNAMIC) add_subdirectory(unit/lfp) endif() endif() message(STATUS "Boost found, unit tests enabled") else() message(STATUS "Boost not found, unit tests disabled") endif() add_subdirectory(integration) ================================================ FILE: tests/integration/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= if(CORENRN_ENABLE_MPI_DYNAMIC) # ~~~ # In case of submodule building we don't know the MPI launcher and mpi # distribution being used. So for now just skip these tests and rely on # neuron to test dynamic mpi mode. For coreneuron build assume are just # building single generic mpi library libcorenrn_mpi. # ~~~ if(CORENEURON_AS_SUBPROJECT) message(STATUS "CoreNEURON integration tests are disabled with dynamic MPI") return() else() set(CORENRN_MPI_LIB_ARG "--mpi-lib ${PROJECT_BINARY_DIR}/lib/lib${CORENRN_MPI_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" ) endif() endif() set(COMMON_ARGS "--tstop 100. --celsius 6.3 --mpi ${CORENRN_MPI_LIB_ARG}") set(MODEL_STATS_ARG "--model-stats") set(RING_DATASET_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ring") set(RING_COMMON_ARGS "--datpath ${RING_DATASET_DIR} ${COMMON_ARGS}") set(RING_GAP_COMMON_ARGS "--datpath ${CMAKE_CURRENT_SOURCE_DIR}/ring_gap ${COMMON_ARGS}") set(PERMUTE1_ARGS "--cell-permute 1") set(PERMUTE2_ARGS "--cell-permute 2") set(CUDA_INTERFACE "--cuda-interface") if(CORENRN_ENABLE_GPU) set(GPU_ARGS "--gpu") set(permutation_modes 1 2) else() set(permutation_modes 0 1) endif() # List of tests with arguments set(TEST_CASES_WITH_ARGS "ring!${RING_COMMON_ARGS} ${MODEL_STATS_ARG} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring" "ring_binqueue!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_binqueue --binqueue" "ring_multisend!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_multisend --multisend" "ring_spike_buffer!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_spike_buffer --spikebuf 1" "ring_gap!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap" "ring_gap_binqueue!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_binqueue --binqueue" "ring_gap_multisend!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_multisend --multisend" ) set(test_suffixes "" "_binqueue" "_multisend") foreach(cell_permute ${permutation_modes}) list(APPEND test_suffixes "_permute${cell_permute}") list( APPEND TEST_CASES_WITH_ARGS "ring_permute${cell_permute}!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute${cell_permute} --cell-permute=${cell_permute}" "ring_gap_permute${cell_permute}!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute${cell_permute} --cell-permute=${cell_permute}" ) # As reports require MPI, do not add test if report is enabled. if(NOT CORENRN_ENABLE_REPORTING) list(APPEND test_suffixes "_serial_permute${cell_permute}") list( APPEND TEST_CASES_WITH_ARGS "ring_serial_permute${cell_permute}!${GPU_ARGS} --cell-permute=${cell_permute} --tstop 100. --celsius 6.3 --datpath ${RING_DATASET_DIR} ${MODEL_STATS_ARG} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_serial_permute${cell_permute}" ) endif() endforeach() if(CORENRN_ENABLE_GPU) list(APPEND test_suffixes "_permute2_cudaInterface") list( APPEND TEST_CASES_WITH_ARGS "ring_permute2_cudaInterface!${RING_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" "ring_gap_permute2_cudaInterface!${RING_GAP_COMMON_ARGS} ${GPU_ARGS} --outpath ${CMAKE_CURRENT_BINARY_DIR}/ring_gap_permute2_cudaInterface ${PERMUTE2_ARGS} ${CUDA_INTERFACE}" ) endif() # ~~~ # There are no directories for permute and multisend related tests, # create them and copy reference spikes # ~~~ foreach(data_dir "ring" "ring_gap") # Naïve foreach(test_suffix ${test_suffixes}) does not seem to handle empty suffixes correctly. list(LENGTH test_suffixes num_suffixes) math(EXPR num_suffixes_m1 "${num_suffixes} - 1") foreach(suffix_index RANGE 0 ${num_suffixes_m1}) list(GET test_suffixes ${suffix_index} test_suffix) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/${data_dir}/out.dat.ref" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/${data_dir}${test_suffix}/") endforeach() endforeach() # test without ring_gap version file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/ring/out.dat.ref" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/ring_spike_buffer/") # names of all tests added set(CORENRN_TEST_NAMES "") # Configure test scripts foreach(args_line ${TEST_CASES_WITH_ARGS}) string(REPLACE "!" ";" string_line ${args_line}) set(test_num_processors 1) if(MPI_FOUND) # serial test run without srun or mpiexec if(args_line MATCHES "ring_serial.*") string(REPLACE ";" " " SRUN_PREFIX "") else() set(test_num_processors 2) string(REPLACE ";" " " SRUN_PREFIX "${TEST_MPI_EXEC_BIN};-n;${test_num_processors}") endif() endif() list(GET string_line 0 TEST_NAME) list(GET string_line 1 TEST_ARGS) set(SIM_NAME ${TEST_NAME}) configure_file(integration_test.sh.in ${TEST_NAME}/integration_test.sh @ONLY) add_test( NAME ${TEST_NAME}_TEST COMMAND "/bin/sh" ${CMAKE_CURRENT_BINARY_DIR}/${TEST_NAME}/integration_test.sh WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${TEST_NAME}") set_tests_properties(${TEST_NAME}_TEST PROPERTIES PROCESSORS ${test_num_processors}) cpp_cc_configure_sanitizers(TEST ${TEST_NAME}_TEST) list(APPEND CORENRN_TEST_NAMES ${TEST_NAME}_TEST) endforeach() if(CORENRN_ENABLE_REPORTING) foreach(TEST_NAME "1") set(SIM_NAME "reporting_${TEST_NAME}") set(CONFIG_ARG "${TEST_NAME}") configure_file(reportinglib/${TEST_NAME}.conf.in ${SIM_NAME}/${TEST_NAME}.conf @ONLY) configure_file(reportinglib/reporting_test.sh.in ${SIM_NAME}/reporting_test.sh @ONLY) configure_file(reportinglib/${TEST_NAME}.check.in ${SIM_NAME}/${TEST_NAME}.check @ONLY) file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/reportinglib/test_ref.out" DESTINATION "${SIM_NAME}/") add_test( NAME ${SIM_NAME} COMMAND "/bin/sh" ${CMAKE_CURRENT_BINARY_DIR}/${SIM_NAME}/reporting_test.sh WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${SIM_NAME}") cpp_cc_configure_sanitizers(TEST ${SIM_NAME}) list(APPEND CORENRN_TEST_NAMES ${SIM_NAME}) endforeach() endif() ================================================ FILE: tests/integration/README.md ================================================ # Generating Tests Input Dataset There two integration tests under `tests/integration/` directory. The input dataset is generated using NEURON. You can follow below steps for test data generation. Once you have latest NEURON installed, you have to clone [ringtest](https://github.com/nrnhines/ringtest) model from github: ```bash git clone https://github.com/nrnhines/ringtest.git ``` You have to create `special` as usual with NEURON: ```bash nrnivmodl mod ``` Now we can generate data for `ring` test as: ```bash mpirun -n 2 ./x86_64/special ringtest.py -nring 1 -ncell 20 -tstop 100 -mpi -dumpmodel # sort spikes and remove old spike output sortspike spk2.std coredat/out.dat.ref rm spk2.std ``` The generated dataset can be copied to `tests/integration/ring/`: ```bash mv coredat/* /coreneuron/tests/integration/ring/ ``` Similarly, dataset for `ring_gap` test can be generated as: ```bash mpirun -n 2 ./x86_64/special ringtest.py -nring 1 -ncell 20 -tstop 100 -gap -mpi -dumpmodel # sort spikes and remove old spike output sortspike spk2.std coredat/out.dat.ref rm spk2.std mv coredat/* /coreneuron/tests/integration/ring_gap/ ``` ================================================ FILE: tests/integration/integration_test.sh.in ================================================ #!/usr/bin/env bash set -e export OMP_NUM_THREADS=1 export LIBSONATA_ZERO_BASED_GIDS=true # Run the executable SRUN_EXTRA= if [ -n "$VALGRIND" -a -n "$VALGRIND_PRELOAD" ]; then echo "Running with valgrind" LD_PRELOAD=$VALGRIND_PRELOAD \ @SRUN_PREFIX@ $SRUN_EXTRA $VALGRIND @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core @TEST_ARGS@ else @SRUN_PREFIX@ $SRUN_EXTRA @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core @TEST_ARGS@ fi exitvalue=$? # Check for error result if [ $exitvalue -ne 0 ]; then echo "Error status value: $exitvalue" exit $exitvalue fi # diff outputed files with reference cd @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@ # We convert spikes to out.dat format reports=@ENABLE_SONATA_REPORTS_TESTS@ if [ "$reports" = "ON" ] then data=$(@H5DUMP_EXECUTABLE@ -d /spikes/All/timestamps -d /spikes/All/node_ids -y -O out.h5 | sed 's/"ms"//g;s/,/\n/g') echo $data | awk '{n=NF/2; for (i=1;i<=n;i++) print $i "\t" $(n+i) }' > out_SONATA.dat if [ ! -f out_SONATA.dat ] then echo "[ERROR] No SONATA output files. Test failed!" >&2 exit 1 fi diff -w out_SONATA.dat out.dat.ref > diff_SONATA.dat 2>&1 if [ -s diff_SONATA.dat ] then echo "[ERROR] SONATA Results are different, check the file diff_SONATA.dat. Test failed!" >&2 exit 1 fi fi if [ ! -f out.dat ] then echo "[ERROR] No output files. Test failed!" >&2 exit 1 fi diff -w out.dat out.dat.ref > diff.dat 2>&1 || true if [ -s diff.dat ] then echo "[ERROR] Results are different, check the file diff.dat. Test failed!" >&2 exit 1 else echo "Results are the same, test passed" rm -f *.dat exit 0 fi ================================================ FILE: tests/integration/reportinglib/1.check.in ================================================ #!/bin/sh OK=0 FAILED=1 sonata_reports=@ENABLE_SONATA_REPORTS_TESTS@ bin_reports=@ENABLE_BIN_REPORTS_TESTS@ test_ref=@CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/test_ref.out if [ "$bin_reports" = "ON" ] then if [ -f test_1.bbp ] then somaDump_diff=$(@reportinglib_somaDump@ test_1.bbp 1 | sed 's/ //g' | diff $test_ref -) if [ $? -ne 0 ] then echo -e "[ERROR] The report output generated by Reportinglib differs!\n$somaDump_diff" >&2 exit $FAILED fi else echo "[ERROR] Expected ReportingLib soma file 'test_1.bbp' is missing. Test failed!" >&2 exit $FAILED fi fi if [ "$sonata_reports" = "ON" ] then if [ -f test_2.h5 ] then h5dump_diff=$(@H5DUMP_EXECUTABLE@ -d /report/PopA/data -y -O test_2.h5 | sed '1d;$d;s/,//g;s/ //g' | diff $test_ref -) if [ $? -ne 0 ] then echo -e "[ERROR] The report output generated by Libsonata differs!\n$h5dump_diff" >&2 exit $FAILED fi else echo "[ERROR] Expected SONATA soma file 'test_2.h5' doesn't exist. Test failed!" >&2 exit $FAILED fi if [ ! -f spikes.h5 ] then echo "[ERROR] Expected SONATA spike file 'spikes.h5' doesn't exist. Test failed!" >&2 exit $FAILED fi fi # If we reach this point, all tests were successful exit $OK ================================================ FILE: tests/integration/reportinglib/1.conf.in ================================================ outpath = ./ datpath = @CMAKE_CURRENT_SOURCE_DIR@/ring/ tstop = 10.000000 dt = 0.025000 forwardskip = 0.000000 prcellgid = -1 report-conf = @CMAKE_CURRENT_SOURCE_DIR@/reportinglib/1.report cell-permute = 0 ================================================ FILE: tests/integration/reportinglib/reporting_test.sh.in ================================================ #! /bin/sh set -e -o pipefail export OMP_NUM_THREADS=1 export LIBSONATA_ZERO_BASED_GIDS=true @SRUN_PREFIX@ @CMAKE_BINARY_DIR@/bin/@CMAKE_SYSTEM_PROCESSOR@/special-core --mpi --read-config @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.conf chmod +x @CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.check exit `@CMAKE_CURRENT_BINARY_DIR@/@SIM_NAME@/@TEST_NAME@.check` ================================================ FILE: tests/integration/reportinglib/test_ref.out ================================================ -65 -64.9973 -64.9951 -64.9932 -64.9916 -64.9902 -64.9889 -64.9877 -64.9867 -64.9858 -64.985 -64.9842 -64.9836 -64.9829 -64.9824 -64.9819 -64.9815 -64.9811 -64.9807 -64.9804 -64.9802 -64.9799 -64.9797 -64.9796 -64.9794 -64.9793 -64.9792 -64.9791 -64.979 -64.979 -64.979 -64.979 -64.979 -64.979 -64.979 -64.9791 -64.9791 -64.7371 -63.6264 -62.1068 -60.4682 -58.847 -57.2905 -55.7913 -54.3056 -52.7594 -51.044 -48.9961 -46.3491 -42.6233 -36.8741 -27.1665 -10.1852 13.977 31.4561 36.143 35.2487 32.4239 28.6338 24.2472 19.4933 14.5405 9.51339 4.50006 -0.440951 -5.27461 -9.98373 -14.5648 -19.0258 -23.3868 -27.6838 -31.9759 -36.353 -40.9401 -45.8855 -51.303 -57.1176 -62.8313 -67.5469 -70.6416 -72.2969 -73.0829 -73.4434 -73.6102 -73.6866 -73.7171 -73.7212 -73.7082 -73.6828 -73.6479 -73.6053 -73.5561 -73.5012 -73.4414 -73.3771 -73.3089 -73.237 -73.1618 -73.0836 -73.0025 ================================================ FILE: tests/integration/ring/out.dat.ref ================================================ 2.65 0 5.3 1 7.95 2 10.6 3 13.25 4 15.9 5 18.55 6 21.2 7 23.85 8 26.5 9 29.15 10 31.8 11 34.45 12 37.1 13 39.75 14 42.4 15 45.05 16 47.7 17 50.35 18 53 19 55.65 0 58.3 1 60.95 2 63.6 3 66.25 4 68.9 5 71.55 6 74.2 7 76.85 8 79.5 9 82.15 10 84.8 11 87.45 12 90.1 13 92.75 14 95.4 15 98.05 16 ================================================ FILE: tests/integration/ring_gap/mod files/halfgap.mod ================================================ : ggap.mod : This is a conductance based gap junction to allow setting g = 0 NEURON { POINT_PROCESS HalfGap RANGE g, i, vgap ELECTRODE_CURRENT i } PARAMETER { g = 0 (1/megohm) } ASSIGNED { v (millivolt) vgap (millivolt) i (nanoamp) } BREAKPOINT { i = (vgap - v)*g } ================================================ FILE: tests/integration/ring_gap/out.dat.ref ================================================ 3.275 19 4.325 0 4.425 18 5.5 1 5.575 17 6.65 2 6.75 16 7.825 3 7.9 15 8.975 4 9.05 14 10.15 5 10.225 13 11.325 6 11.4 12 12.475 7 12.55 11 13.625 8 13.7 10 14.25 9 ================================================ FILE: tests/unit/alignment/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= add_executable(alignment_test_bin alignment.cpp) target_link_libraries(alignment_test_bin coreneuron-unit-test) add_test(NAME alignment_test COMMAND $) cpp_cc_configure_sanitizers(TARGET alignment_test_bin TEST alignment_test) ================================================ FILE: tests/unit/alignment/alignment.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/utils/memory.h" #include #define BOOST_TEST_MODULE PaddingCheck #include #include #include template struct data { typedef T value_type; static const int chunk = n; }; typedef boost::mpl::list, data> chunk_default_data_type; typedef boost::mpl::list, data, data, data, data, data, data, data, data, data> chunk_data_type; BOOST_AUTO_TEST_CASE(padding_simd) { /** AOS test */ int pad = coreneuron::soa_padded_size<1>(11, 1); BOOST_CHECK_EQUAL(pad, 11); /** SOA tests with 11 */ pad = coreneuron::soa_padded_size<1>(11, 0); BOOST_CHECK_EQUAL(pad, 11); pad = coreneuron::soa_padded_size<2>(11, 0); BOOST_CHECK_EQUAL(pad, 12); pad = coreneuron::soa_padded_size<4>(11, 0); BOOST_CHECK_EQUAL(pad, 12); pad = coreneuron::soa_padded_size<8>(11, 0); BOOST_CHECK_EQUAL(pad, 16); pad = coreneuron::soa_padded_size<16>(11, 0); BOOST_CHECK_EQUAL(pad, 16); pad = coreneuron::soa_padded_size<32>(11, 0); BOOST_CHECK_EQUAL(pad, 32); /** SOA tests with 32 */ pad = coreneuron::soa_padded_size<1>(32, 0); BOOST_CHECK_EQUAL(pad, 32); pad = coreneuron::soa_padded_size<2>(32, 0); BOOST_CHECK_EQUAL(pad, 32); pad = coreneuron::soa_padded_size<4>(32, 0); BOOST_CHECK_EQUAL(pad, 32); pad = coreneuron::soa_padded_size<8>(32, 0); BOOST_CHECK_EQUAL(pad, 32); pad = coreneuron::soa_padded_size<16>(32, 0); BOOST_CHECK_EQUAL(pad, 32); pad = coreneuron::soa_padded_size<32>(32, 0); BOOST_CHECK_EQUAL(pad, 32); /** SOA tests with 33 */ pad = coreneuron::soa_padded_size<1>(33, 0); BOOST_CHECK_EQUAL(pad, 33); pad = coreneuron::soa_padded_size<2>(33, 0); BOOST_CHECK_EQUAL(pad, 34); pad = coreneuron::soa_padded_size<4>(33, 0); BOOST_CHECK_EQUAL(pad, 36); pad = coreneuron::soa_padded_size<8>(33, 0); BOOST_CHECK_EQUAL(pad, 40); pad = coreneuron::soa_padded_size<16>(33, 0); BOOST_CHECK_EQUAL(pad, 48); pad = coreneuron::soa_padded_size<32>(33, 0); BOOST_CHECK_EQUAL(pad, 64); } /// Even number is randomly depends of the TYPE!!! and the number of elements. /// This test work for 64 bits type not for 32 bits. BOOST_AUTO_TEST_CASE_TEMPLATE(memory_alignment_simd_false, T, chunk_default_data_type) { const int c = T::chunk; int total_size_chunk = coreneuron::soa_padded_size(247, 0); int ne = 6 * total_size_chunk; typename T::value_type* data = (typename T::value_type*) coreneuron::ecalloc_align(ne, sizeof(typename T::value_type), 16); for (int i = 1; i < 6; i += 2) { bool b = coreneuron::is_aligned((data + i * total_size_chunk), 16); BOOST_CHECK_EQUAL(b, 0); } for (int i = 0; i < 6; i += 2) { bool b = coreneuron::is_aligned((data + i * total_size_chunk), 16); BOOST_CHECK_EQUAL(b, 1); } free_memory(data); } BOOST_AUTO_TEST_CASE_TEMPLATE(memory_alignment_simd_true, T, chunk_data_type) { const int c = T::chunk; int total_size_chunk = coreneuron::soa_padded_size(247, 0); int ne = 6 * total_size_chunk; typename T::value_type* data = (typename T::value_type*) coreneuron::ecalloc_align(ne, sizeof(typename T::value_type), 16); for (int i = 0; i < 6; ++i) { bool b = coreneuron::is_aligned((data + i * total_size_chunk), 16); BOOST_CHECK_EQUAL(b, 1); } free_memory(data); } ================================================ FILE: tests/unit/cmdline_interface/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= add_executable(cmd_interface_test_bin test_cmdline_interface.cpp) target_link_libraries(cmd_interface_test_bin coreneuron-unit-test) add_test(NAME cmd_interface_test COMMAND $) cpp_cc_configure_sanitizers(TARGET cmd_interface_test_bin TEST cmd_interface_test) ================================================ FILE: tests/unit/cmdline_interface/test_cmdline_interface.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/apps/corenrn_parameters.hpp" #define BOOST_TEST_MODULE cmdline_interface #include #include using namespace coreneuron; BOOST_AUTO_TEST_CASE(cmdline_interface) { const char* argv[] = { "nrniv-core", "--mpi", "--dt", "0.02", "--tstop", "0.1", #ifdef CORENEURON_ENABLE_GPU "--gpu", #endif "--cell-permute", "2", "--nwarp", "8", "-d", "./", "--voltage", "-32", "--threading", "--ms-phases", "1", "--ms-subintervals", "2", "--multisend", "--spkcompress", "32", "--binqueue", "--spikebuf", "100", "--prcellgid", "12", "--forwardskip", "0.02", "--celsius", "25.12", "--mindelay", "0.1", "--dt_io", "0.2"}; constexpr int argc = sizeof argv / sizeof argv[0]; corenrn_parameters corenrn_param_test; corenrn_param_test.parse(argc, const_cast(argv)); // discarding const as CLI11 // interface is not const BOOST_CHECK(corenrn_param_test.seed == -1); // testing default value BOOST_CHECK(corenrn_param_test.spikebuf == 100); BOOST_CHECK(corenrn_param_test.threading == true); BOOST_CHECK(corenrn_param_test.dt == 0.02); BOOST_CHECK(corenrn_param_test.tstop == 0.1); BOOST_CHECK(corenrn_param_test.prcellgid == 12); #ifdef CORENEURON_ENABLE_GPU BOOST_CHECK(corenrn_param_test.gpu == true); #else BOOST_CHECK(corenrn_param_test.gpu == false); #endif BOOST_CHECK(corenrn_param_test.dt_io == 0.2); BOOST_CHECK(corenrn_param_test.forwardskip == 0.02); BOOST_CHECK(corenrn_param_test.celsius == 25.12); BOOST_CHECK(corenrn_param_test.mpi_enable == true); BOOST_CHECK(corenrn_param_test.cell_interleave_permute == 2); BOOST_CHECK(corenrn_param_test.voltage == -32); BOOST_CHECK(corenrn_param_test.nwarp == 8); BOOST_CHECK(corenrn_param_test.multisend == true); BOOST_CHECK(corenrn_param_test.mindelay == 0.1); BOOST_CHECK(corenrn_param_test.ms_phases == 1); BOOST_CHECK(corenrn_param_test.ms_subint == 2); BOOST_CHECK(corenrn_param_test.spkcompress == 32); BOOST_CHECK(corenrn_param_test.multisend == true); // Reset all parameters to their default values. corenrn_param_test.reset(); // Should match a default-constructed set of parameters. BOOST_CHECK_EQUAL(corenrn_param_test.voltage, corenrn_parameters{}.voltage); // Everything has its default value, and the first `false` says not to // include default values in the output, so this should be empty BOOST_CHECK(corenrn_param_test.config_to_str(false, false).empty()); } ================================================ FILE: tests/unit/interleave_info/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= add_executable(interleave_info_bin check_constructors.cpp) target_link_libraries(interleave_info_bin coreneuron-unit-test) add_test(NAME interleave_info_constructor_test COMMAND $) cpp_cc_configure_sanitizers(TARGET interleave_info_bin TEST interleave_info_constructor_test) ================================================ FILE: tests/unit/interleave_info/check_constructors.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/permute/cellorder.hpp" #define BOOST_TEST_MODULE cmdline_interface #include using namespace coreneuron; BOOST_AUTO_TEST_CASE(interleave_info_test) { size_t nwarp = 4; size_t nstride = 6; InterleaveInfo info1; int data1[] = {11, 37, 45, 2, 18, 37, 7, 39, 66, 33}; size_t data2[] = {111, 137, 245, 12, 118, 237, 199, 278, 458}; info1.nwarp = nwarp; info1.nstride = nstride; // to avoid same values, different sub-array is used to initialize different members copy_align_array(info1.stridedispl, data1, nwarp + 1); copy_align_array(info1.stride, data1 + 1, nstride); copy_align_array(info1.firstnode, data1 + 1, nwarp + 1); copy_align_array(info1.lastnode, data1 + 1, nwarp + 1); // check if copy_array works BOOST_CHECK_NE(info1.firstnode, info1.lastnode); BOOST_CHECK_EQUAL_COLLECTIONS(info1.firstnode, info1.firstnode + nwarp + 1, info1.lastnode, info1.lastnode + nwarp + 1); copy_align_array(info1.cellsize, data1 + 4, nwarp); copy_array(info1.nnode, data2, nwarp); copy_array(info1.ncycle, data2 + 1, nwarp); copy_array(info1.idle, data2 + 2, nwarp); copy_array(info1.cache_access, data2 + 3, nwarp); copy_array(info1.child_race, data2 + 4, nwarp); // copy constructor InterleaveInfo info2(info1); // assignment operator InterleaveInfo info3; info3 = info1; std::vector infos; infos.push_back(&info2); infos.push_back(&info3); // test few members for (size_t i = 0; i < infos.size(); i++) { BOOST_CHECK_EQUAL(info1.nwarp, infos[i]->nwarp); BOOST_CHECK_EQUAL(info1.nstride, infos[i]->nstride); BOOST_CHECK_EQUAL_COLLECTIONS(info1.stridedispl, info1.stridedispl + nwarp + 1, infos[i]->stridedispl, infos[i]->stridedispl + nwarp + 1); BOOST_CHECK_EQUAL_COLLECTIONS(info1.stride, info1.stride + nstride, infos[i]->stride, infos[i]->stride + nstride); BOOST_CHECK_EQUAL_COLLECTIONS(info1.cellsize, info1.cellsize + nwarp, infos[i]->cellsize, infos[i]->cellsize + nwarp); BOOST_CHECK_EQUAL_COLLECTIONS(info1.child_race, info1.child_race + nwarp, infos[i]->child_race, infos[i]->child_race + nwarp); } } ================================================ FILE: tests/unit/lfp/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= add_executable(lfp_test_bin lfp.cpp) target_link_libraries(lfp_test_bin coreneuron-unit-test) add_test(NAME lfp_test COMMAND $) cpp_cc_configure_sanitizers(TARGET lfp_test_bin TEST lfp_test) set_property( TEST lfp_test APPEND PROPERTY ENVIRONMENT OMP_NUM_THREADS=1) ================================================ FILE: tests/unit/lfp/lfp.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/io/lfp.hpp" #include "coreneuron/mpi/nrnmpi.h" #define BOOST_TEST_MODULE LFPTest #include #include using namespace coreneuron; using namespace coreneuron::lfputils; template double integral(F f, double a, double b, int n) { double step = (b - a) / n; // width of each small rectangle double area = 0.0; // signed area for (int i = 0; i < n; i++) { area += f(a + (i + 0.5) * step) * step; // sum up each small rectangle } return area; } BOOST_AUTO_TEST_CASE(LFP_PointSource_LineSource) { #if NRNMPI nrnmpi_init(nullptr, nullptr, false); #endif double segment_length{1.0e-6}; double segment_start_val{1.0e-6}; std::array segment_start = std::array{0.0, 0.0, segment_start_val}; std::array segment_end = paxpy(segment_start, 1.0, std::array{0.0, 0.0, segment_length}); double floor{1.0e-6}; pi = 3.141592653589; std::array vals; double circling_radius{1.0e-6}; std::array segment_middle{0.0, 0.0, 1.5e-6}; double medium_resistivity_fac{1.0}; for (auto k = 0; k < 10; k++) { std::array approaching_elec = paxpy(segment_middle, 1.0, std::array{0.0, 1.0e-5 - k * 1.0e-6, 0.0}); std::array circling_elec = paxpy(segment_middle, 1.0, std::array{0.0, circling_radius * std::cos(2.0 * pi * k / 10), circling_radius * std::sin(2.0 * pi * k / 10)}); double analytic_approaching_lfp = line_source_lfp_factor( approaching_elec, segment_start, segment_end, floor, medium_resistivity_fac); double analytic_circling_lfp = line_source_lfp_factor( circling_elec, segment_start, segment_end, floor, medium_resistivity_fac); double numeric_circling_lfp = integral( [&](double x) { return 1.0 / std::max(floor, norm(paxpy(circling_elec, -1.0, paxpy(segment_end, x, paxpy(segment_start, -1.0, segment_end))))); }, 0.0, 1.0, 10000); // TEST of analytic vs numerical integration std::clog << "ANALYTIC line source " << analytic_circling_lfp << " vs NUMERIC line source LFP " << numeric_circling_lfp << "\n"; BOOST_REQUIRE_CLOSE(analytic_circling_lfp, numeric_circling_lfp, 1.0e-6); // TEST of LFP Flooring BOOST_REQUIRE((approaching_elec[1] < 0.866e-6) ? analytic_approaching_lfp == 1.0e6 : true); vals[k] = analytic_circling_lfp; } // TEST of SYMMETRY of LFP FORMULA for (size_t k = 0; k < 5; k++) { BOOST_REQUIRE(std::abs((vals[k] - vals[k + 5]) / std::max(std::abs(vals[k]), std::abs(vals[k + 5]))) < 1.0e-12); } std::vector> segments_starts = {{0., 0., 1.}, {0., 0., 0.5}, {0.0, 0.0, 0.0}, {0.0, 0.0, -0.5}}; std::vector> segments_ends = {{0., 0., 0.}, {0., 0., 1.}, {0., 0., 0.5}, {0.0, 0.0, 0.0}}; std::vector radii{0.1, 0.1, 0.1, 0.1}; std::vector> electrodes = {{0.0, 0.3, 0.0}, {0.0, 0.7, 0.8}}; std::vector indices = {0, 1, 2, 3}; LFPCalculator lfp(segments_starts, segments_ends, radii, indices, electrodes, 1.0); lfp.template lfp>({0.0, 1.0, 2.0, 3.0}); std::vector res_line_source = lfp.lfp_values(); LFPCalculator lfpp( segments_starts, segments_ends, radii, indices, electrodes, 1.0); lfpp.template lfp>({0.0, 1.0, 2.0, 3.0}); std::vector res_point_source = lfpp.lfp_values(); BOOST_REQUIRE_CLOSE(res_line_source[0], res_point_source[0], 1.0); BOOST_REQUIRE_CLOSE(res_line_source[1], res_point_source[1], 1.0); #if NRNMPI nrnmpi_finalize(); #endif } ================================================ FILE: tests/unit/queueing/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= add_executable(queuing_test_bin test_queueing.cpp) target_link_libraries(queuing_test_bin coreneuron-unit-test) add_test(NAME queuing_test COMMAND $) cpp_cc_configure_sanitizers(TARGET queuing_test_bin TEST queuing_test) ================================================ FILE: tests/unit/queueing/test_queueing.cpp ================================================ /* # ============================================================================= # Copyright (c) 2016 - 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/network/netcvode.hpp" #include "coreneuron/network/tqueue.hpp" #define BOOST_TEST_MODULE QueueingTest #include #include #include #include using namespace coreneuron; // UNIT TESTS BOOST_AUTO_TEST_CASE(priority_queue_nq_dq) { TQueue tq = TQueue(); const int num = 8; int cnter = 0; // enqueue 8 items with increasing time for (int i = 0; i < num; ++i) tq.insert(static_cast(i), NULL); BOOST_CHECK(tq.pq_que_.size() == (num - 1)); // dequeue items with time <= 5.0. Should be 6 events: from 0. to 5. TQItem* item = NULL; while ((item = tq.atomic_dq(5.0)) != NULL) { ++cnter; delete item; } BOOST_CHECK(cnter == 6); BOOST_CHECK(tq.pq_que_.size() == (num - 6 - 1)); // dequeue the rest while ((item = tq.atomic_dq(8.0)) != NULL) { ++cnter; delete item; } BOOST_CHECK(cnter == num); BOOST_CHECK(tq.pq_que_.empty()); BOOST_CHECK(tq.least() == NULL); } BOOST_AUTO_TEST_CASE(tqueue_ordered_test) { TQueue tq = TQueue(); const int num = 10; int cnter = 0; double time = double(); // insert N items with time < N for (int i = 0; i < num; ++i) { time = static_cast(rand() % num); tq.insert(time, NULL); } time = 0.0; TQItem* item = NULL; // dequeue all items and check that previous item time <= current item time while ((item = tq.atomic_dq(10.0)) != NULL) { BOOST_CHECK(time <= item->t_); ++cnter; time = item->t_; delete item; } BOOST_CHECK(cnter == num); BOOST_CHECK(tq.pq_que_.empty()); BOOST_CHECK(tq.least() == NULL); } BOOST_AUTO_TEST_CASE(tqueue_move_nolock) {} BOOST_AUTO_TEST_CASE(tqueue_remove) {} BOOST_AUTO_TEST_CASE(threaddata_interthread_send) { NetCvodeThreadData nt{}; const size_t num = 6; for (size_t i = 0; i < num; ++i) nt.interthread_send(static_cast(i), NULL, NULL); BOOST_CHECK(nt.inter_thread_events_.size() == num); } /* BOOST_AUTO_TEST_CASE(threaddata_enqueue){ NetCvode n = NetCvode(); const int num = 6; for(int i = 0; i < num; ++i) n.p[1].interthread_send(static_cast(i), NULL, NULL); BOOST_CHECK(n.p[1].inter_thread_events_.size() == num); //enqueue the inter_thread_events_ n.p[1].enqueue(&n, &(n.p[1])); BOOST_CHECK(n.p[1].inter_thread_events_.empty()); BOOST_CHECK(n.p[1].tqe_->pq_que_.size() == num); //cleanup priority queue TQItem* item = NULL; while((item = n.p[1].tqe_->atomic_dq(6.0)) != NULL) delete item; }*/ ================================================ FILE: tests/unit/solver/CMakeLists.txt ================================================ # ============================================================================= # Copyright (c) 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # ============================================================================= add_executable(test-solver test_solver.cpp) target_link_libraries(test-solver coreneuron-unit-test) add_test(NAME test-solver COMMAND $) cpp_cc_configure_sanitizers(TARGET test-solver TEST test-solver) ================================================ FILE: tests/unit/solver/test_solver.cpp ================================================ /* # ============================================================================= # Copyright (c) 2022 Blue Brain Project/EPFL # # See top-level LICENSE file for details. # =============================================================================. */ #include "coreneuron/apps/corenrn_parameters.hpp" #include "coreneuron/gpu/nrn_acc_manager.hpp" #include "coreneuron/permute/cellorder.hpp" #include "coreneuron/permute/node_permute.h" #include "coreneuron/sim/multicore.hpp" #define BOOST_TEST_MODULE CoreNEURON solver #include #include #include #include #include #include #include using namespace coreneuron; namespace utf = boost::unit_test; struct SolverData { std::vector d, rhs; std::vector parent_index; }; constexpr auto magic_index_value = -2; constexpr auto magic_double_value = std::numeric_limits::lowest(); enum struct SolverImplementation { CellPermute0_CPU, CellPermute0_GPU, CellPermute1_CPU, CellPermute1_GPU, CellPermute2_CPU, CellPermute2_GPU, CellPermute2_CUDA }; std::ostream& operator<<(std::ostream& os, SolverImplementation impl) { if (impl == SolverImplementation::CellPermute0_CPU) { return os << "SolverImplementation::CellPermute0_CPU"; } else if (impl == SolverImplementation::CellPermute0_GPU) { return os << "SolverImplementation::CellPermute0_GPU"; } else if (impl == SolverImplementation::CellPermute1_CPU) { return os << "SolverImplementation::CellPermute1_CPU"; } else if (impl == SolverImplementation::CellPermute1_GPU) { return os << "SolverImplementation::CellPermute1_GPU"; } else if (impl == SolverImplementation::CellPermute2_CPU) { return os << "SolverImplementation::CellPermute2_CPU"; } else if (impl == SolverImplementation::CellPermute2_GPU) { return os << "SolverImplementation::CellPermute2_GPU"; } else if (impl == SolverImplementation::CellPermute2_CUDA) { return os << "SolverImplementation::CellPermute2_CUDA"; } else { throw std::runtime_error("Invalid SolverImplementation"); } } struct ToyModelConfig { int num_threads{1}; int num_cells{1}; int num_segments_per_cell{3}; std::function produce_a{[](auto, auto) { return 3.14159; }}, produce_b{[](auto, auto) { return 42.0; }}, produce_d{[](auto, auto) { return 7.0; }}, produce_rhs{[](auto, auto) { return -16.0; }}; }; // TODO include some global lock as a sanity check (only one instance of // SetupThreads should exist at any given time) struct SetupThreads { SetupThreads(SolverImplementation impl, ToyModelConfig config = {}) { corenrn_param.cuda_interface = false; corenrn_param.gpu = false; switch (impl) { case SolverImplementation::CellPermute0_GPU: corenrn_param.gpu = true; [[fallthrough]]; case SolverImplementation::CellPermute0_CPU: interleave_permute_type = 0; break; case SolverImplementation::CellPermute1_GPU: corenrn_param.gpu = true; [[fallthrough]]; case SolverImplementation::CellPermute1_CPU: interleave_permute_type = 1; break; case SolverImplementation::CellPermute2_CUDA: corenrn_param.cuda_interface = true; [[fallthrough]]; case SolverImplementation::CellPermute2_GPU: corenrn_param.gpu = true; [[fallthrough]]; case SolverImplementation::CellPermute2_CPU: interleave_permute_type = 2; break; } use_solve_interleave = interleave_permute_type > 0; nrn_threads_create(config.num_threads); create_interleave_info(); int num_cells_remaining{config.num_cells}, total_cells{}; for (auto ithread = 0; ithread < nrn_nthread; ++ithread) { auto& nt = nrn_threads[ithread]; // How many cells to distribute on this thread, trying to get the right // total even if num_threads does not exactly divide num_cells. nt.ncell = num_cells_remaining / (nrn_nthread - ithread); total_cells += nt.ncell; num_cells_remaining -= nt.ncell; // How many segments are there in this thread? nt.end = nt.ncell * config.num_segments_per_cell; auto const padded_size = nrn_soa_padded_size(nt.end, 0); // Allocate one big block because the GPU data transfer code assumes this. nt._ndata = padded_size * 4; nt._data = static_cast(emalloc_align(nt._ndata * sizeof(double))); auto* vec_rhs = (nt._actual_rhs = nt._data + 0 * padded_size); auto* vec_d = (nt._actual_d = nt._data + 1 * padded_size); auto* vec_a = (nt._actual_a = nt._data + 2 * padded_size); auto* vec_b = (nt._actual_b = nt._data + 3 * padded_size); auto* parent_indices = (nt._v_parent_index = static_cast(emalloc_align(padded_size * sizeof(int)))); // Magic value to check against later. std::fill(parent_indices, parent_indices + nt.end, magic_index_value); // Put all the root nodes first, then put the other segments // in blocks. i.e. ABCDAAAABBBBCCCCDDDD auto const get_index = [ncell = nt.ncell, nseg = config.num_segments_per_cell](auto icell, auto iseg) { if (iseg == 0) { return icell; } else { return ncell + icell * (nseg - 1) + iseg - 1; } }; for (auto icell = 0; icell < nt.ncell; ++icell) { for (auto iseg = 0; iseg < config.num_segments_per_cell; ++iseg) { auto const global_index = get_index(icell, iseg); vec_a[global_index] = config.produce_a(icell, iseg); vec_b[global_index] = config.produce_b(icell, iseg); vec_d[global_index] = config.produce_d(icell, iseg); vec_rhs[global_index] = config.produce_rhs(icell, iseg); // 0th element is the root node, which has no parent // other elements are attached in a binary tree configuration // | 0 | // | / \ | // | 1 2 | // | / \ / \ | // | 3 4 5 6 | // TODO: include some other topologies, e.g. a long straight line, or // an unbalanced tree. auto const parent_id = iseg ? get_index(icell, (iseg - 1) / 2) : -1; parent_indices[global_index] = parent_id; } } // Check we didn't mess up populating any parent indices for (auto i = 0; i < nt.end; ++i) { BOOST_REQUIRE(parent_indices[i] != magic_index_value); // Root nodes should come first for --cell-permute=0 if (i < nt.ncell) { BOOST_REQUIRE(parent_indices[i] == -1); } } if (interleave_permute_type) { nt._permute = interleave_order(nt.id, nt.ncell, nt.end, parent_indices); BOOST_REQUIRE(nt._permute); permute_data(vec_a, nt.end, nt._permute); permute_data(vec_b, nt.end, nt._permute); // This isn't done in CoreNEURON because these are reset every // time step, but permute d/rhs here so that the initial values // set by produce_d and produce_rhs are propagated consistently // to all of the solver implementations. permute_data(vec_d, nt.end, nt._permute); permute_data(vec_rhs, nt.end, nt._permute); // index values change as well as ordering permute_ptr(parent_indices, nt.end, nt._permute); node_permute(parent_indices, nt.end, nt._permute); } } if (impl == SolverImplementation::CellPermute0_GPU) { std::cout << "CellPermute0_GPU is a nonstandard configuration, copying data to the " "device may produce warnings:"; } if (corenrn_param.gpu) { setup_nrnthreads_on_device(nrn_threads, nrn_nthread); } if (impl == SolverImplementation::CellPermute0_GPU) { std::cout << "\n...no more warnings expected" << std::endl; } // Make sure we produced the number of cells we were aiming for BOOST_REQUIRE(total_cells == config.num_cells); BOOST_REQUIRE(num_cells_remaining == 0); } ~SetupThreads() { if (corenrn_param.gpu) { delete_nrnthreads_on_device(nrn_threads, nrn_nthread); } for (auto& nt: *this) { free_memory(std::exchange(nt._data, nullptr)); delete[] std::exchange(nt._permute, nullptr); free_memory(std::exchange(nt._v_parent_index, nullptr)); } destroy_interleave_info(); nrn_threads_free(); } auto dump_solver_data() { std::vector ret{static_cast(nrn_nthread)}; // Sync the solver data from GPU to host update_nrnthreads_on_host(nrn_threads, nrn_nthread); // Un-permute the data in and store it in ret.{d,parent_index,rhs} for (auto i = 0; i < nrn_nthread; ++i) { auto& nt = nrn_threads[i]; auto& sd = ret[i]; sd.d.resize(nt.end, magic_double_value); sd.parent_index.resize(nt.end, magic_index_value); sd.rhs.resize(nt.end, magic_double_value); auto* inv_permute = nt._permute ? inverse_permute(nt._permute, nt.end) : nullptr; for (auto i = 0; i < nt.end; ++i) { // index in permuted vectors auto const p_i = nt._permute ? nt._permute[i] : i; // parent index in permuted vectors auto const p_parent = nt._v_parent_index[p_i]; // parent index in unpermuted vectors (i.e. on the same scale as `i`) auto const parent = p_parent == -1 ? -1 : (inv_permute ? inv_permute[p_parent] : p_parent); // Save the values to the de-permuted return structure sd.d[i] = nt._actual_d[p_i]; sd.parent_index[i] = parent; sd.rhs[i] = nt._actual_rhs[p_i]; } delete[] inv_permute; for (auto i = 0; i < nt.end; ++i) { BOOST_REQUIRE(sd.d[i] != magic_double_value); BOOST_REQUIRE(sd.parent_index[i] != magic_index_value); BOOST_REQUIRE(sd.rhs[i] != magic_double_value); } } return ret; } void solve() { for (auto& thread: *this) { nrn_solve_minimal(&thread); } } NrnThread* begin() const { return nrn_threads; } NrnThread* end() const { return nrn_threads + nrn_nthread; } }; template auto solve_and_dump(Args&&... args) { SetupThreads threads{std::forward(args)...}; threads.solve(); return threads.dump_solver_data(); } auto active_implementations() { // These are always available std::vector ret{SolverImplementation::CellPermute0_CPU, SolverImplementation::CellPermute1_CPU, SolverImplementation::CellPermute2_CPU}; #ifdef CORENEURON_ENABLE_GPU // Consider making these steerable via a runtime switch in GPU builds ret.push_back(SolverImplementation::CellPermute0_GPU); ret.push_back(SolverImplementation::CellPermute1_GPU); ret.push_back(SolverImplementation::CellPermute2_GPU); ret.push_back(SolverImplementation::CellPermute2_CUDA); #endif return ret; } void compare_solver_data( std::map> const& solver_data) { // CellPermute0_CPU is the simplest version of the solver, it should always // be present and it's a good reference to use constexpr auto ref_impl = SolverImplementation::CellPermute0_CPU; BOOST_REQUIRE(solver_data.find(ref_impl) != solver_data.end()); auto const& ref_data = solver_data.at(ref_impl); for (auto const& [impl, impl_data]: solver_data) { // Must have compatible numbers of threads. BOOST_REQUIRE(impl_data.size() == ref_data.size()); std::cout << "Comparing " << impl << " to " << ref_impl << std::endl; for (auto n_thread = 0ul; n_thread < impl_data.size(); ++n_thread) { // Must have compatible numbers of segments/data entries BOOST_REQUIRE(impl_data[n_thread].d.size() == ref_data[n_thread].d.size()); BOOST_REQUIRE(impl_data[n_thread].parent_index.size() == ref_data[n_thread].parent_index.size()); BOOST_REQUIRE(impl_data[n_thread].rhs.size() == ref_data[n_thread].rhs.size()); BOOST_TEST(impl_data[n_thread].d == ref_data[n_thread].d, boost::test_tools::per_element()); BOOST_TEST(impl_data[n_thread].parent_index == ref_data[n_thread].parent_index, boost::test_tools::per_element()); BOOST_TEST(impl_data[n_thread].rhs == ref_data[n_thread].rhs, boost::test_tools::per_element()); } } } template auto compare_all_active_implementations(Args&&... args) { std::map> solver_data; for (auto impl: active_implementations()) { solver_data[impl] = solve_and_dump(impl, std::forward(args)...); } compare_solver_data(solver_data); return solver_data; } // *Roughly* tuned to accomodate NVHPC 22.3 at -O0; the largest differences come // from the pseudorandom seeded tests. constexpr double default_tolerance = 2e-11; // May need to add some different tolerances here BOOST_AUTO_TEST_CASE(SingleCellAndThread, *utf::tolerance(default_tolerance)) { constexpr std::size_t segments = 32; ToyModelConfig config{}; config.num_segments_per_cell = segments; auto const solver_data = compare_all_active_implementations(config); for (auto const& [impl, data]: solver_data) { BOOST_REQUIRE(data.size() == 1); // nthreads BOOST_REQUIRE(data[0].d.size() == segments); BOOST_REQUIRE(data[0].parent_index.size() == segments); BOOST_REQUIRE(data[0].rhs.size() == segments); } } BOOST_AUTO_TEST_CASE(UnbalancedCellSingleThread, *utf::tolerance(default_tolerance)) { ToyModelConfig config{}; config.num_segments_per_cell = 19; // not a nice round number compare_all_active_implementations(config); } BOOST_AUTO_TEST_CASE(LargeCellSingleThread, *utf::tolerance(default_tolerance)) { ToyModelConfig config{}; config.num_segments_per_cell = 4096; compare_all_active_implementations(config); } BOOST_AUTO_TEST_CASE(ManySmallCellsSingleThread, *utf::tolerance(default_tolerance)) { ToyModelConfig config{}; config.num_cells = 1024; compare_all_active_implementations(config); } BOOST_AUTO_TEST_CASE(ManySmallCellsMultiThread, *utf::tolerance(default_tolerance)) { ToyModelConfig config{}; config.num_cells = 1024; config.num_threads = 2; compare_all_active_implementations(config); } auto random_config() { std::mt19937_64 gen{42}; ToyModelConfig config{}; config.produce_a = [g = gen, d = std::normal_distribution{1.0, 0.1}](int icell, int iseg) mutable { return d(g); }; config.produce_b = [g = gen, d = std::normal_distribution{7.0, 0.2}](int, int) mutable { return d(g); }; config.produce_d = [g = gen, d = std::normal_distribution{-0.1, 0.01}](int, int) mutable { return d(g); }; config.produce_rhs = [g = gen, d = std::normal_distribution{-15.0, 2.0}](int, int) mutable { return d(g); }; return config; } BOOST_AUTO_TEST_CASE(LargeCellSingleThreadRandom, *utf::tolerance(default_tolerance)) { auto config = random_config(); config.num_segments_per_cell = 4096; compare_all_active_implementations(config); } BOOST_AUTO_TEST_CASE(ManySmallCellsSingleThreadRandom, *utf::tolerance(default_tolerance)) { auto config = random_config(); config.num_cells = 1024; compare_all_active_implementations(config); }