Repository: choderalab/espaloma
Branch: main
Commit: 413eb5507403
Files: 297
Total size: 652.0 KB

Directory structure:
gitextract_8sr_6d4i/

├── .codecov.yml
├── .gitattributes
├── .github/
│   └── workflows/
│       ├── CI.yaml
│       ├── clean_cache.yaml
│       └── docker.yaml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── MANIFEST.in
├── README.md
├── devtools/
│   ├── README.md
│   ├── conda-envs/
│   │   └── espaloma.yaml
│   ├── conda-recipe/
│   │   ├── build.sh
│   │   └── meta.yml
│   ├── gh-actions/
│   │   └── initialize_conda.sh
│   └── scripts/
│       └── create_conda_env.py
├── docker/
│   └── Dockerfile
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── _static/
│   │   └── README.md
│   ├── _templates/
│   │   ├── README.md
│   │   ├── custom-class-template.rst
│   │   └── custom-module-template.rst
│   ├── api.rst
│   ├── autosummary/
│   │   ├── espaloma.data.collection.alkethoh.rst
│   │   ├── espaloma.data.collection.esol.rst
│   │   ├── espaloma.data.collection.md17_new.rst
│   │   ├── espaloma.data.collection.md17_old.rst
│   │   ├── espaloma.data.collection.qca.rst
│   │   ├── espaloma.data.collection.rst
│   │   ├── espaloma.data.collection.zinc.rst
│   │   ├── espaloma.data.dataset.Dataset.rst
│   │   ├── espaloma.data.dataset.GraphDataset.rst
│   │   ├── espaloma.data.dataset.rst
│   │   ├── espaloma.data.md.MoleculeVacuumSimulation.rst
│   │   ├── espaloma.data.md.rst
│   │   ├── espaloma.data.md.subtract_nonbonded_force.rst
│   │   ├── espaloma.data.md.subtract_nonbonded_force_except_14.rst
│   │   ├── espaloma.data.md17_utils.get_molecule.rst
│   │   ├── espaloma.data.md17_utils.realize_molecule.rst
│   │   ├── espaloma.data.md17_utils.rst
│   │   ├── espaloma.data.md17_utils.sum_offsets.rst
│   │   ├── espaloma.data.normalize.BaseNormalize.rst
│   │   ├── espaloma.data.normalize.DatasetLogNormalNormalize.rst
│   │   ├── espaloma.data.normalize.DatasetNormalNormalize.rst
│   │   ├── espaloma.data.normalize.ESOL100LogNormalNormalize.rst
│   │   ├── espaloma.data.normalize.ESOL100NormalNormalize.rst
│   │   ├── espaloma.data.normalize.NotNormalize.rst
│   │   ├── espaloma.data.normalize.PositiveNotNormalize.rst
│   │   ├── espaloma.data.normalize.rst
│   │   ├── espaloma.data.qcarchive_utils.MolWithTargets.rst
│   │   ├── espaloma.data.qcarchive_utils.breakdown_along_time_axis.rst
│   │   ├── espaloma.data.qcarchive_utils.fetch_td_record.rst
│   │   ├── espaloma.data.qcarchive_utils.get_client.rst
│   │   ├── espaloma.data.qcarchive_utils.get_collection.rst
│   │   ├── espaloma.data.qcarchive_utils.get_energy_and_gradient.rst
│   │   ├── espaloma.data.qcarchive_utils.get_graph.rst
│   │   ├── espaloma.data.qcarchive_utils.h5_to_dataset.rst
│   │   ├── espaloma.data.qcarchive_utils.make_batch_size_consistent.rst
│   │   ├── espaloma.data.qcarchive_utils.rst
│   │   ├── espaloma.data.qcarchive_utils.weight_by_snapshots.rst
│   │   ├── espaloma.data.rst
│   │   ├── espaloma.data.utils.batch.rst
│   │   ├── espaloma.data.utils.collate_fn.rst
│   │   ├── espaloma.data.utils.from_csv.rst
│   │   ├── espaloma.data.utils.infer_mol_from_coordinates.rst
│   │   ├── espaloma.data.utils.make_temp_directory.rst
│   │   ├── espaloma.data.utils.normalize.rst
│   │   ├── espaloma.data.utils.rst
│   │   ├── espaloma.data.utils.split.rst
│   │   ├── espaloma.data.utils.sum_offsets.rst
│   │   ├── espaloma.graphs.deploy.load_forcefield.rst
│   │   ├── espaloma.graphs.deploy.openmm_system_from_graph.rst
│   │   ├── espaloma.graphs.deploy.rst
│   │   ├── espaloma.graphs.graph.BaseGraph.rst
│   │   ├── espaloma.graphs.graph.Graph.rst
│   │   ├── espaloma.graphs.graph.rst
│   │   ├── espaloma.graphs.legacy_force_field.LegacyForceField.rst
│   │   ├── espaloma.graphs.legacy_force_field.rst
│   │   ├── espaloma.graphs.rst
│   │   ├── espaloma.graphs.utils.offmol_indices.angle_indices.rst
│   │   ├── espaloma.graphs.utils.offmol_indices.atom_indices.rst
│   │   ├── espaloma.graphs.utils.offmol_indices.bond_indices.rst
│   │   ├── espaloma.graphs.utils.offmol_indices.improper_torsion_indices.rst
│   │   ├── espaloma.graphs.utils.offmol_indices.proper_torsion_indices.rst
│   │   ├── espaloma.graphs.utils.offmol_indices.rst
│   │   ├── espaloma.graphs.utils.read_heterogeneous_graph.duplicate_index_ordering.rst
│   │   ├── espaloma.graphs.utils.read_heterogeneous_graph.from_homogeneous_and_mol.rst
│   │   ├── espaloma.graphs.utils.read_heterogeneous_graph.relationship_indices_from_offmol.rst
│   │   ├── espaloma.graphs.utils.read_heterogeneous_graph.rst
│   │   ├── espaloma.graphs.utils.read_homogeneous_graph.fp_oe.rst
│   │   ├── espaloma.graphs.utils.read_homogeneous_graph.fp_rdkit.rst
│   │   ├── espaloma.graphs.utils.read_homogeneous_graph.from_oemol.rst
│   │   ├── espaloma.graphs.utils.read_homogeneous_graph.from_openff_toolkit_mol.rst
│   │   ├── espaloma.graphs.utils.read_homogeneous_graph.from_rdkit_mol.rst
│   │   ├── espaloma.graphs.utils.read_homogeneous_graph.rst
│   │   ├── espaloma.graphs.utils.rst
│   │   ├── espaloma.mm.angle.angle_high.rst
│   │   ├── espaloma.mm.angle.bond_angle.rst
│   │   ├── espaloma.mm.angle.bond_bond.rst
│   │   ├── espaloma.mm.angle.harmonic_angle.rst
│   │   ├── espaloma.mm.angle.linear_mixture_angle.rst
│   │   ├── espaloma.mm.angle.rst
│   │   ├── espaloma.mm.angle.urey_bradley.rst
│   │   ├── espaloma.mm.bond.bond_high.rst
│   │   ├── espaloma.mm.bond.gaussian_bond.rst
│   │   ├── espaloma.mm.bond.harmonic_bond.rst
│   │   ├── espaloma.mm.bond.linear_mixture_bond.rst
│   │   ├── espaloma.mm.bond.rst
│   │   ├── espaloma.mm.energy.CarryII.rst
│   │   ├── espaloma.mm.energy.EnergyInGraph.rst
│   │   ├── espaloma.mm.energy.EnergyInGraphII.rst
│   │   ├── espaloma.mm.energy.apply_angle.rst
│   │   ├── espaloma.mm.energy.apply_angle_ii.rst
│   │   ├── espaloma.mm.energy.apply_angle_linear_mixture.rst
│   │   ├── espaloma.mm.energy.apply_bond.rst
│   │   ├── espaloma.mm.energy.apply_bond_gaussian.rst
│   │   ├── espaloma.mm.energy.apply_bond_ii.rst
│   │   ├── espaloma.mm.energy.apply_bond_linear_mixture.rst
│   │   ├── espaloma.mm.energy.apply_improper_torsion.rst
│   │   ├── espaloma.mm.energy.apply_nonbonded.rst
│   │   ├── espaloma.mm.energy.apply_torsion.rst
│   │   ├── espaloma.mm.energy.apply_torsion_ii.rst
│   │   ├── espaloma.mm.energy.energy_in_graph.rst
│   │   ├── espaloma.mm.energy.energy_in_graph_ii.rst
│   │   ├── espaloma.mm.energy.rst
│   │   ├── espaloma.mm.functional.gaussian.rst
│   │   ├── espaloma.mm.functional.harmonic.rst
│   │   ├── espaloma.mm.functional.harmonic_harmonic_coupled.rst
│   │   ├── espaloma.mm.functional.harmonic_harmonic_periodic_coupled.rst
│   │   ├── espaloma.mm.functional.harmonic_periodic_coupled.rst
│   │   ├── espaloma.mm.functional.linear_mixture.rst
│   │   ├── espaloma.mm.functional.linear_mixture_to_original.rst
│   │   ├── espaloma.mm.functional.lj.rst
│   │   ├── espaloma.mm.functional.periodic.rst
│   │   ├── espaloma.mm.functional.periodic_fixed_phases.rst
│   │   ├── espaloma.mm.functional.rst
│   │   ├── espaloma.mm.geometry.GeometryInGraph.rst
│   │   ├── espaloma.mm.geometry.angle.rst
│   │   ├── espaloma.mm.geometry.apply_angle.rst
│   │   ├── espaloma.mm.geometry.apply_bond.rst
│   │   ├── espaloma.mm.geometry.apply_torsion.rst
│   │   ├── espaloma.mm.geometry.copy_src.rst
│   │   ├── espaloma.mm.geometry.dihedral.rst
│   │   ├── espaloma.mm.geometry.distance.rst
│   │   ├── espaloma.mm.geometry.geometry_in_graph.rst
│   │   ├── espaloma.mm.geometry.reduce_stack.rst
│   │   ├── espaloma.mm.geometry.rst
│   │   ├── espaloma.mm.nonbonded.arithmetic_mean.rst
│   │   ├── espaloma.mm.nonbonded.geometric_mean.rst
│   │   ├── espaloma.mm.nonbonded.lj_12_6.rst
│   │   ├── espaloma.mm.nonbonded.lj_9_6.rst
│   │   ├── espaloma.mm.nonbonded.lorentz_berthelot.rst
│   │   ├── espaloma.mm.nonbonded.rst
│   │   ├── espaloma.mm.rst
│   │   ├── espaloma.mm.torsion.angle_angle.rst
│   │   ├── espaloma.mm.torsion.angle_angle_torsion.rst
│   │   ├── espaloma.mm.torsion.angle_torsion.rst
│   │   ├── espaloma.mm.torsion.bond_torsion.rst
│   │   ├── espaloma.mm.torsion.periodic_torsion.rst
│   │   ├── espaloma.mm.torsion.rst
│   │   ├── espaloma.nn.baselines.FreeParameterBaseline.rst
│   │   ├── espaloma.nn.baselines.FreeParameterBaselineInitMean.rst
│   │   ├── espaloma.nn.baselines.rst
│   │   ├── espaloma.nn.layers.dgl_legacy.GN.rst
│   │   ├── espaloma.nn.layers.dgl_legacy.rst
│   │   ├── espaloma.nn.layers.rst
│   │   ├── espaloma.nn.readout.base_readout.BaseReadout.rst
│   │   ├── espaloma.nn.readout.base_readout.rst
│   │   ├── espaloma.nn.readout.charge_equilibrium.ChargeEquilibrium.rst
│   │   ├── espaloma.nn.readout.charge_equilibrium.get_charges.rst
│   │   ├── espaloma.nn.readout.charge_equilibrium.rst
│   │   ├── espaloma.nn.readout.graph_level_readout.GraphLevelReadout.rst
│   │   ├── espaloma.nn.readout.graph_level_readout.rst
│   │   ├── espaloma.nn.readout.janossy.ExpCoefficients.rst
│   │   ├── espaloma.nn.readout.janossy.JanossyPooling.rst
│   │   ├── espaloma.nn.readout.janossy.JanossyPoolingImproper.rst
│   │   ├── espaloma.nn.readout.janossy.JanossyPoolingNonbonded.rst
│   │   ├── espaloma.nn.readout.janossy.LinearMixtureToOriginal.rst
│   │   ├── espaloma.nn.readout.janossy.rst
│   │   ├── espaloma.nn.readout.node_typing.NodeTyping.rst
│   │   ├── espaloma.nn.readout.node_typing.rst
│   │   ├── espaloma.nn.readout.rst
│   │   ├── espaloma.nn.rst
│   │   ├── espaloma.nn.sequential.Sequential.rst
│   │   └── espaloma.nn.sequential.rst
│   ├── conf.py
│   ├── deploy.rst
│   ├── download_experiments.sh
│   ├── experiments/
│   │   ├── index.rst
│   │   ├── mm_fitting_small.rst
│   │   ├── qm_fitting.rst
│   │   └── typing.rst
│   ├── index.rst
│   ├── install.rst
│   ├── make.bat
│   └── qm_fitting.rst
├── espaloma/
│   ├── .py
│   ├── __init__.py
│   ├── _version.py
│   ├── app/
│   │   ├── __init__.py
│   │   ├── experiment.py
│   │   ├── report.py
│   │   ├── tests/
│   │   │   └── test_experiment.py
│   │   ├── train.py
│   │   ├── train_all_params.py
│   │   ├── train_bonded_energy.py
│   │   └── train_multi_typing.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── collection.py
│   │   ├── dataset.py
│   │   ├── md.py
│   │   ├── md17_utils.py
│   │   ├── normalize.py
│   │   ├── off-mol_0_10_6.json
│   │   ├── qcarchive_utils.py
│   │   ├── tests/
│   │   │   ├── test_collection.py
│   │   │   ├── test_dataset.py
│   │   │   ├── test_md.py
│   │   │   ├── test_normalize.py
│   │   │   ├── test_qcarchive.py
│   │   │   └── test_save_and_load.py
│   │   └── utils.py
│   ├── graphs/
│   │   ├── __init__.py
│   │   ├── deploy.py
│   │   ├── graph.py
│   │   ├── legacy_force_field.py
│   │   ├── tests/
│   │   │   ├── test_deploy.py
│   │   │   ├── test_gaff_parametrize.py
│   │   │   ├── test_graph.py
│   │   │   └── test_smirnoff.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── offmol_indices.py
│   │       ├── read_heterogeneous_graph.py
│   │       ├── read_homogeneous_graph.py
│   │       └── regenerate_impropers.py
│   ├── metrics.py
│   ├── mm/
│   │   ├── __init__.py
│   │   ├── angle.py
│   │   ├── bond.py
│   │   ├── energy.py
│   │   ├── functional.py
│   │   ├── geometry.py
│   │   ├── nonbonded.py
│   │   ├── tests/
│   │   │   ├── system.xml
│   │   │   ├── test_angle.py
│   │   │   ├── test_angle_energy.py
│   │   │   ├── test_bond_energy.py
│   │   │   ├── test_charge_energy_consistency.py
│   │   │   ├── test_charge_energy_consistency_hardcode.py
│   │   │   ├── test_dihedral.py
│   │   │   ├── test_distance.py
│   │   │   ├── test_energy.py
│   │   │   ├── test_energy_gaussian.py
│   │   │   ├── test_energy_ii.py
│   │   │   ├── test_geometry.py
│   │   │   ├── test_linear_combination.py
│   │   │   ├── test_openmm_consistency.py
│   │   │   └── test_recoverability.py
│   │   └── torsion.py
│   ├── nn/
│   │   ├── __init__.py
│   │   ├── baselines.py
│   │   ├── layers/
│   │   │   ├── __init__.py
│   │   │   └── dgl_legacy.py
│   │   ├── readout/
│   │   │   ├── __init__.py
│   │   │   ├── base_readout.py
│   │   │   ├── charge_equilibrium.py
│   │   │   ├── graph_level_readout.py
│   │   │   ├── janossy.py
│   │   │   └── node_typing.py
│   │   ├── sequential.py
│   │   └── tests/
│   │       ├── test_baseline.py
│   │       ├── test_janossy.py
│   │       └── test_simple_net.py
│   ├── units.py
│   └── utils/
│       ├── geometry.py
│       ├── model_fetch.py
│       └── tests/
│           └── test_model_fetch.py
├── requirements.txt
├── scripts/
│   ├── README.md
│   └── perses-benchmark/
│       ├── README.md
│       ├── espaloma-perses.export.yaml
│       ├── espaloma-perses.yaml
│       └── tyk2/
│           ├── README.md
│           ├── espaloma-0.2.2/
│           │   ├── LSF-job-template.sh
│           │   ├── README.md
│           │   ├── benchmark_analysis.py
│           │   ├── run_benchmarks.py
│           │   └── template.yaml
│           └── openff-1.2.0/
│               ├── LSF-job-template.sh
│               ├── README.md
│               ├── benchmark_analysis.py
│               ├── run_benchmarks.py
│               └── template.yaml
├── setup.cfg
├── setup.py
└── versioneer.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .codecov.yml
================================================
# Codecov configuration to make it a bit less noisy
coverage:
  status:
    patch: false
    project:
      default:
        threshold: 50%
comment:
  layout: "header"
  require_changes: false
  branches: null
  behavior: default
  flags: null
  paths: null


================================================
FILE: .gitattributes
================================================
*.ipynb linguist-documentation
*.html linguist-documentation
espaloma/_version.py export-subst


================================================
FILE: .github/workflows/CI.yaml
================================================
name: CI

on:
  pull_request:
    branches:
      - main
  push:
    branches:
      - main 

  schedule:
    # Nightly tests run on master by default:
    #   Scheduled workflows run on the latest commit on the default or base branch.
    #   (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule)
    - cron: "0 0 * * *"

concurrency:
  group: "${{ github.workflow }}-${{ github.ref }}"
  cancel-in-progress: true

defaults:
  run:
    shell: bash -leo pipefail {0}

jobs:
  test:
    name: ${{ matrix.os }}, Python ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}-latest
    strategy:
      fail-fast: false
      matrix:
        os: ['ubuntu','macos']
        python-version:
          - "3.12"
          - "3.11"
          - "3.10"

    env:
      OPENMM: ${{ matrix.cfg.openmm }}
      OE_LICENSE: ${{ github.workspace }}/oe_license.txt

    steps:
      - uses: actions/checkout@v3
      - name: Get current date
        id: date
        run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"  
      - uses: mamba-org/setup-micromamba@v1
        with:
          environment-file: devtools/conda-envs/espaloma.yaml
          cache-environment: true
          cache-downloads: true
          cache-environment-key: environment-${{ steps.date.outputs.date }}
          cache-downloads-key: downloads-${{ steps.date.outputs.date }}
          create-args: >-
            python=${{ matrix.python-version }}

      - name: Additional info about the build
        shell: bash
        run: |
          uname -a
          df -h
          ulimit -a

      - name: Environment Information
        run: |
          micromamba info
          micromamba list
          micromamba --version

      - name: Install package
        run: |
          python -m pip install --no-deps -e .

      - name: Run tests
        run: |
          pytest -v --cov=espaloma --cov-report=xml --color=yes espaloma/

      - name: CodeCov
        uses: codecov/codecov-action@v3
        if: ${{ github.repository == 'choderalab/espaloma'
                && github.event_name == 'pull_request' }} 
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          file: ./coverage.xml
          flags: unittests
          yml: ./.codecov.yml
          fail_ci_if_error: False
          verbose: True


================================================
FILE: .github/workflows/clean_cache.yaml
================================================
# from https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries
name: cleanup caches by a branch
on:
  pull_request:
    types:
      - closed

jobs:
  cleanup:
    runs-on: ubuntu-latest
    steps:
      - name: Check out code
        uses: actions/checkout@v3
        
      - name: Cleanup
        run: |
          gh extension install actions/gh-actions-cache
          
          REPO=${{ github.repository }}
          BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge"

          echo "Fetching list of cache key"
          cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 )

          ## Setting this to not fail the workflow while deleting cache keys. 
          set +e
          echo "Deleting caches..."
          for cacheKey in $cacheKeysForPR
          do
              gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
          done
          echo "Done"
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/docker.yaml
================================================
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

# GitHub recommends pinning actions to a commit SHA.
# To get a newer version, you will need to update the SHA.
# You can also reference a tag or branch, but the action may change without warning.

name: Create and publish a Docker image

on:
  workflow_dispatch:

defaults:
  run:
    shell: bash -l {0}

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: choderalab/espaloma

jobs:
  build-and-push-image:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write

    steps:
      - name: Free disk space
        run: |
          sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
          sudo rm -rf \
            /usr/share/dotnet /usr/local/lib/android /opt/ghc \
            /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
            /usr/lib/jvm || true
          echo "some directories deleted"
          sudo apt install aptitude -y >/dev/null 2>&1
          sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \
            esl-erlang firefox gfortran-8 gfortran-9 google-chrome-stable \
            google-cloud-sdk imagemagick \
            libmagickcore-dev libmagickwand-dev libmagic-dev ant ant-optional kubectl \
            mercurial apt-transport-https mono-complete libmysqlclient \
            unixodbc-dev yarn chrpath libssl-dev libxft-dev \
            libfreetype6 libfreetype6-dev libfontconfig1 libfontconfig1-dev \
            snmp pollinate libpq-dev postgresql-client powershell ruby-full \
            sphinxsearch subversion mongodb-org azure-cli microsoft-edge-stable \
            -y -f >/dev/null 2>&1
          sudo aptitude purge google-cloud-sdk -f -y >/dev/null 2>&1
          sudo aptitude purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true
          sudo apt purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true
          sudo aptitude purge '~n ^mysql' -f -y >/dev/null 2>&1
          sudo aptitude purge '~n ^php' -f -y >/dev/null 2>&1
          sudo aptitude purge '~n ^dotnet' -f -y >/dev/null 2>&1
          sudo apt-get autoremove -y >/dev/null 2>&1
          sudo apt-get autoclean -y >/dev/null 2>&1
          echo "some packages purged"

      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          fetch-depth: 0

      - name: Get Latest Version
        id: latest-version
        run: |
          LATEST_TAG=$(git describe --tags $(git rev-list --tags --max-count=1))
          echo $LATEST_TAG
          echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_OUTPUT
          VERSION=$LATEST_TAG
          echo $VERSION
          echo "VERSION=$VERSION" >> $GITHUB_OUTPUT

      - name: Print Latest Version
        run: echo ${{ steps.latest-version.outputs.VERSION }}

      # Now that we got the version, we don't need the .git folder
      - name: Get more space
        run: |
          df . -h
          sudo rm -rf ${GITHUB_WORKSPACE}/.git
          df . -h

      - name: Create fully qualified image registry path
        id: fqirp
        run: |
          FQIRP=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }}
          echo "FQIRP=$FQIRP" >> $GITHUB_OUTPUT

      - name: Print FQIRP
        run: echo ${{ steps.fqirp.outputs.FQIRP  }}

      - name: Log in to the Container registry
        uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
          tags: |
            type=schedule,pattern=nightly,enable=true,priority=1000
            type=ref,event=branch,enable=true,priority=600
            type=ref,event=tag,enable=true,priority=600
            type=ref,event=pr,prefix=pr-,enable=true,priority=600
            type=semver,pattern={{major}}.{{minor}}
            type=semver,pattern={{version}}
            type=sha
            ${{ steps.latest-version.outputs.VERSION }}

      - name: Build and export to Docker
        uses: docker/build-push-action@v4
        with:
          context: .
          file: docker/Dockerfile
          load: true
          push: false
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            VERSION=${{ steps.latest-version.outputs.VERSION }}

      - name: Test image
        run: |
          docker run --rm ${{ steps.fqirp.outputs.FQIRP }} python -c "import espaloma; print(espaloma.__version__)"
          docker run --rm ${{ steps.fqirp.outputs.FQIRP }} pytest --pyargs espaloma -v

      - name: Push Docker image
        uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
        with:
          context: .
          file: docker/Dockerfile
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            VERSION=${{ steps.latest-version.outputs.VERSION }}

      - name: Setup Apptainer
        uses: eWaterCycle/setup-apptainer@v2
        with:
          apptainer-version: 1.1.2

      - name: Build Apptainer Image
        run: singularity build espaloma_${{ steps.latest-version.outputs.VERSION }}.sif docker-daemon:${{ steps.fqirp.outputs.FQIRP }}

      - name: Test & Push Apptainer Image
        run: |
          mkdir test_apptainer
          cd test_apptainer
          singularity run ../espaloma_${{ steps.latest-version.outputs.VERSION }}.sif pytest --pyargs espaloma -v
          echo ${{ secrets.GITHUB_TOKEN }} | singularity remote login -u ${{ secrets.GHCR_USERNAME }} --password-stdin oras://ghcr.io
          singularity push ../espaloma_${{ steps.latest-version.outputs.VERSION }}.sif oras://${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }}-apptainer


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# data
*.sdf
*.csv

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Parm@Frosst download
parm_at_Frosst.tgz

# misc
.DS_Store


================================================
FILE: .readthedocs.yaml
================================================
version: 2

build:
  os: "ubuntu-20.04"
  tools:
    python: "mambaforge-4.10"

sphinx:
   configuration: docs/conf.py
   fail_on_warning: false

conda:
  environment: devtools/conda-envs/espaloma.yaml

python:
  # Install our python package before building the docs
  install:
    - method: pip
      path: .


================================================
FILE: LICENSE
================================================

MIT License

Copyright (c) 2020 Yuanqing Wang @ choderalab // MSKCC

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: MANIFEST.in
================================================
include LICENSE
include MANIFEST.in
include versioneer.py

graft espaloma
global-exclude *.py[cod] __pycache__ *.so

================================================
FILE: README.md
================================================
espaloma: **E**xtensible **S**urrogate **P**otenti**al** **O**ptimized by **M**essage-passing **A**lgorithms 🍹
==============================
[//]: # (Badges)
[![CI](https://github.com/choderalab/espaloma/actions/workflows/CI.yaml/badge.svg?branch=main)](https://github.com/choderalab/espaloma/actions/workflows/CI.yaml)
[![Documentation Status](https://readthedocs.org/projects/espaloma/badge/?version=latest)](https://espaloma.readthedocs.io/en/latest/?badge=latest)

Source code for [Wang Y, Fass J, and Chodera JD "End-to-End Differentiable Construction of Molecular Mechanics Force Fields."](https://arxiv.org/abs/2010.01196)

![abstract](docs/_static/espaloma_abstract_v2-2.png)

#
Documentation: https://docs.espaloma.org

# Paper Abstract
Molecular mechanics (MM) potentials have long been a workhorse of computational chemistry.
Leveraging accuracy and speed, these functional forms find use in a wide variety of applications in biomolecular modeling and drug discovery, from rapid virtual screening to detailed free energy calculations.
Traditionally, MM potentials have relied on human-curated, inflexible, and poorly extensible discrete chemical perception rules _atom types_ for applying parameters to small molecules or biopolymers, making it difficult to optimize both types and parameters to fit quantum chemical or physical property data.
Here, we propose an alternative approach that uses _graph neural networks_ to perceive chemical environments, producing continuous atom embeddings from which valence and nonbonded parameters can be predicted using invariance-preserving layers.
Since all stages are built from smooth neural functions, the entire process---spanning chemical perception to parameter assignment---is modular and end-to-end differentiable with respect to model parameters, allowing new force fields to be easily constructed, extended, and applied to arbitrary molecules.
We show that this approach is not only sufficiently expressive to reproduce legacy atom types, but that it can learn and extend existing molecular mechanics force fields, construct entirely new force fields applicable to both biopolymers and small molecules from quantum chemical calculations, and even learn to accurately predict free energies from experimental observables.


# Installation

We recommend using [`mamba`](https://mamba.readthedocs.io/en/latest/mamba-installation.html#mamba-installation) which is a drop-in replacement for `conda` and is much faster.   

```bash
$ mamba create --name espaloma -c conda-forge "espaloma=0.3.2"
```

# Example: Deploy espaloma 0.3.2 pretrained force field to arbitrary MM system

```python  
# imports
import os
import torch
import espaloma as esp

# define or load a molecule of interest via the Open Force Field toolkit
from openff.toolkit.topology import Molecule
molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")

# create an Espaloma Graph object to represent the molecule of interest
molecule_graph = esp.Graph(molecule)

# load pretrained model
espaloma_model = esp.get_model("latest")

# apply a trained espaloma model to assign parameters
espaloma_model(molecule_graph.heterograph)

# create an OpenMM System for the specified molecule
openmm_system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph)
```

If using espaloma from a local `.pt` file, say for example `espaloma-0.3.2.pt`,
then you would need to run the `eval` method of the model to get the correct
inference/predictions, as follows:

```python
import torch
...
# load local pretrained model
espaloma_model = torch.load("espaloma-0.3.2.pt")
espaloma_model.eval()
...
```

The rest of the code should be the same as in the previous code block example.

# Compatible models

Below is a compatibility matrix for different versions of `espaloma` code and `espaloma` models (the `.pt` file).

| Model 🧪             | DOI 📝 | Supported Espaloma version 💻 | Release Date 🗓️ | Espaloma architecture change 📐? |
|---------------------|-------|------------------------------|----------------|----------------------------------|
| `espaloma-0.3.2.pt` |       | 0.3.1, 0.3.2, 0.4.0          | Sep 22, 2023   | ✅ No                            |
| `espaloma-0.3.1.pt` |       | 0.3.1, 0.3.2, 0.4.0          | Jul 17, 2023   | ⚠️ Yes                           |
| `espaloma-0.3.0.pt` |       | 0.3.0                        | Apr 26, 2023   | ⚠️Yes                            |

> [!NOTE]  
> `espaloma-0.3.1.pt` and `espaloma-0.3.2.pt` are the same model.

# Using espaloma to parameterize small molecules in relative free energy calculations

An example of using espaloma to parameterize small molecules in relative alchemical free energy calculations is provided in the `scripts/perses-benchmark/` directory.

# Manifest

* `espaloma/` core code for graph-parametrized potential energy functions.
    * `graphs/` data objects that contain various level of information we need.
        * `graph.py` base modules for graphs.
        * `molecule_graph.py` provide APIs to various molecular modelling toolkits.
        * `homogeneous_graph.py` simplest graph representation of a molecule.
        * `heterogeneous_graph.py` graph representation of a molecule that contains information regarding membership of lower-level nodes to higher-level nodes.
        * `parametrized_graph.py` graph representation of a molecule with all parameters needed for energy evaluation.
    * `nn/` neural network models that facilitates translation between graphs.
        * `dgl_legacy.py` API to dgl models for atom-level message passing.
    * `mm/` molecular mechanics functionalities for energy evaluation.
        * `i/` energy terms used in Class-I force field.
            * `bond.py` bond energy
            * `angle.py` angle energy
            * `torsion.py` torsion energy
            * `nonbonded.py` nonbonded energy
        * `ii/` energy terms used in Class-II force field.
            * `coupling.py` coupling terms
            * `polynomial.py` higher order polynomials.

# License

This software is licensed under [MIT license](https://opensource.org/licenses/MIT).

# Copyright

Copyright (c) 2020, Chodera Lab at Memorial Sloan Kettering Cancer Center and Authors:
Authors:
- [Yuanqing Wang](http://www.wangyq.net)
- Josh Fass
- John D. Chodera


================================================
FILE: devtools/README.md
================================================
# Development, testing, and deployment tools

This directory contains a collection of tools for running Continuous Integration (CI) tests, 
conda installation, and other development tools not directly related to the coding process.


## Manifest

### Continuous Integration

You should test your code, but do not feel compelled to use these specific programs. You also may not need Unix and 
Windows testing if you only plan to deploy on specific platforms. These are just to help you get started

* `travis-ci`: Linux and OSX based testing through [Travis-CI](https://about.travis-ci.com/) 
  * `before_install.sh`: Pip/Miniconda pre-package installation script for Travis 
* `appveyor`: Windows based testing through [AppVeyor](https://www.appveyor.com/) (there are no files directly related to this)

### Conda Environment:

This directory contains the files to setup the Conda environment for testing purposes

* `conda-envs`: directory containing the YAML file(s) which fully describe Conda Environments, their dependencies, and those dependency provenance's
  * `test_env.yaml`: Simple test environment file with base dependencies. Channels are not specified here and therefore respect global Conda configuration
  
### Additional Scripts:

This directory contains OS agnostic helper scripts which don't fall in any of the previous categories
* `scripts`
  * `create_conda_env.py`: Helper program for spinning up new conda environments based on a starter file with Python Version and Env. Name command-line options


## How to contribute changes
- Clone the repository if you have write access to the main repo, fork the repository if you are a collaborator.
- Make a new branch with `git checkout -b {your branch name}`
- Make changes and test your code
- Ensure that the test environment dependencies (`conda-envs`) line up with the build and deploy dependencies (`conda-recipe/meta.yaml`)
- Push the branch to the repo (either the main or your fork) with `git push -u origin {your branch name}`
  * Note that `origin` is the default name assigned to the remote, yours may be different
- Make a PR on GitHub with your changes
- We'll review the changes and get your code into the repo after lively discussion!


## Checklist for updates
- [ ] Make sure there is an/are issue(s) opened for your specific update
- [ ] Create the PR, referencing the issue
- [ ] Debug the PR as needed until tests pass
- [ ] Tag the final, debugged version 
   *  `git tag -a X.Y.Z [latest pushed commit] && git push --follow-tags`
- [ ] Get the PR merged in

## Versioneer Auto-version
[Versioneer](https://github.com/warner/python-versioneer) will automatically infer what version 
is installed by looking at the `git` tags and how many commits ahead this version is. The format follows 
[PEP 440](https://www.python.org/dev/peps/pep-0440/) and has the regular expression of:
```regexp
\d+.\d+.\d+(?\+\d+-[a-z0-9]+)
```
If the version of this commit is the same as a `git` tag, the installed version is the same as the tag, 
e.g. `espaloma-0.1.2`, otherwise it will be appended with `+X` where `X` is the number of commits 
ahead from the last tag, and then `-YYYYYY` where the `Y`'s are replaced with the `git` commit hash.


================================================
FILE: devtools/conda-envs/espaloma.yaml
================================================
name: espaloma-test
channels:
  - conda-forge
  - openeye
dependencies:
  # Base dependencies
  - python
  - pip
  # 3rd party
  - openeye-toolkits
  - numpy
  - matplotlib
  - scipy
  - openff-toolkit >=0.12
  - openff-forcefields
  - openff-units
  - smirnoff99frosst>=1.1.0.1  #https://github.com/openforcefield/smirnoff99Frosst/issues/109
  - openmm
  - openmmforcefields >=0.11.2
  - tqdm
  - pydantic <2  # We need our deps to fix this
  - qcportal >=0.50
  - dgl =2.3.0
  - torchdata <= 0.10.0
  # Testing
  - pytest
  - pytest-cov
  - pytest-xdist
  - pytest-randomly
  - codecov
  - nose
  - nose-timer
  - coverage
  - sphinx
  - sphinx_rtd_theme


================================================
FILE: devtools/conda-recipe/build.sh
================================================
pip install .


================================================
FILE: devtools/conda-recipe/meta.yml
================================================
package:
  name: espaloma
  version: !!str 0.0.0

source:
  path: ../../

build:
  preserve_egg_dir: True
  number: 0

requirements:
  build:
    - python
    - setuptools
    - numpy >=1.14

  run:
    - python
    - pip
    - openeye-toolkits
    - numpy
    - matplotlib
    - scipy
    - openff-toolkit
    - openff-forcefields
    - smirnoff99Frosst
    - openmm
    - openmmforcefields
    - pytorch
    - dgl
    - pytest
    - pytest-cov
    - codecov
    - nose
    - nose-timer
    - coverage
    - qcportal
    - torchdata <= 0.10.0

about:
  home: https://github.com/choderalab/perses
  license: MIT
  license_file: LICENSE


================================================
FILE: devtools/gh-actions/initialize_conda.sh
================================================
case $CI_OS in
    windows*)
        eval "$(${CONDA}/condabin/conda.bat shell.bash hook)";;
    *)
        eval "$(${CONDA}/condabin/conda shell.bash hook)";;
esac

================================================
FILE: devtools/scripts/create_conda_env.py
================================================
import argparse
import glob
import os
import re
import shutil
import subprocess as sp
from contextlib import contextmanager
from tempfile import TemporaryDirectory

# YAML imports
try:
    import yaml  # PyYAML
    loader = yaml.load
except ImportError:
    try:
        import ruamel_yaml as yaml  # Ruamel YAML
    except ImportError:
        try:
            # Load Ruamel YAML from the base conda environment
            from importlib import util as import_util
            CONDA_BIN = os.path.dirname(os.environ['CONDA_EXE'])
            ruamel_yaml_path = glob.glob(os.path.join(CONDA_BIN, '..',
                                                      'lib', 'python*.*', 'site-packages',
                                                      'ruamel_yaml', '__init__.py'))[0]
            # Based on importlib example, but only needs to load_module since its the whole package, not just
            # a module
            spec = import_util.spec_from_file_location('ruamel_yaml', ruamel_yaml_path)
            yaml = spec.loader.load_module()
        except (KeyError, ImportError, IndexError):
            raise ImportError("No YAML parser could be found in this or the conda environment. "
                              "Could not find PyYAML or Ruamel YAML in the current environment, "
                              "AND could not find Ruamel YAML in the base conda environment through CONDA_EXE path. " 
                              "Environment not created!")
    loader = yaml.YAML(typ="safe").load  # typ="safe" avoids odd typing on output


@contextmanager
def temp_cd():
    """Temporary CD Helper"""
    cwd = os.getcwd()
    with TemporaryDirectory() as td:
        try:
            os.chdir(td)
            yield
        finally:
            os.chdir(cwd)


# Args
parser = argparse.ArgumentParser(description='Creates a conda environment from file for a given Python version.')
parser.add_argument('-n', '--name', type=str,
                    help='The name of the created Python environment')
parser.add_argument('-p', '--python', type=str,
                    help='The version of the created Python environment')
parser.add_argument('conda_file',
                    help='The file for the created Python environment')

args = parser.parse_args()

# Open the base file
with open(args.conda_file, "r") as handle:
    yaml_script = loader(handle.read())

python_replacement_string = "python {}*".format(args.python)

try:
    for dep_index, dep_value in enumerate(yaml_script['dependencies']):
        if re.match('python([ ><=*]+[0-9.*]*)?$', dep_value):  # Match explicitly 'python' and its formats
            yaml_script['dependencies'].pop(dep_index)
            break  # Making the assumption there is only one Python entry, also avoids need to enumerate in reverse
except (KeyError, TypeError):
    # Case of no dependencies key, or dependencies: None
    yaml_script['dependencies'] = []
finally:
    # Ensure the python version is added in. Even if the code does not need it, we assume the env does
    yaml_script['dependencies'].insert(0, python_replacement_string)

# Figure out conda path
if "CONDA_EXE" in os.environ:
    conda_path = os.environ["CONDA_EXE"]
else:
    conda_path = shutil.which("conda")
if conda_path is None:
    raise RuntimeError("Could not find a conda binary in CONDA_EXE variable or in executable search path")

print("CONDA ENV NAME  {}".format(args.name))
print("PYTHON VERSION  {}".format(args.python))
print("CONDA FILE NAME {}".format(args.conda_file))
print("CONDA PATH      {}".format(conda_path))

# Write to a temp directory which will always be cleaned up
with temp_cd():
    temp_file_name = "temp_script.yaml"
    with open(temp_file_name, 'w') as f:
        f.write(yaml.dump(yaml_script))
    sp.call("{} env create -n {} -f {}".format(conda_path, args.name, temp_file_name), shell=True)


================================================
FILE: docker/Dockerfile
================================================
FROM mambaorg/micromamba:1.4.9

LABEL org.opencontainers.image.source=https://github.com/choderalab/espaloma
LABEL org.opencontainers.image.description="Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm"
LABEL org.opencontainers.image.licenses=MIT
# OpenFE Version we want to build
ARG VERSION

# Don't buffer stdout & stderr streams, so if there is a crash no partial buffer output is lost
# https://docs.python.org/3/using/cmdline.html#cmdoption-u
ENV PYTHONUNBUFFERED=1

RUN micromamba install -y -n base -c conda-forge -c dglteam pytest "dgl<1" git "espaloma==$VERSION" && \
    micromamba clean --all --yes

# Ensure that conda environment is automatically activated
# https://github.com/mamba-org/micromamba-docker#running-commands-in-dockerfile-within-the-conda-environment
ARG MAMBA_DOCKERFILE_ACTIVATE=1


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SPHINXPROJ    = espaloma
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

================================================
FILE: docs/README.md
================================================
# Compiling espaloma's Documentation

The docs for this project are built with [Sphinx](http://www.sphinx-doc.org/en/master/).
To compile the docs, first ensure that Sphinx and the ReadTheDocs theme are installed.


```bash
conda install sphinx sphinx_rtd_theme 
```


Once installed, you can use the `Makefile` in this directory to compile static HTML pages by
```bash
make html
```

The compiled docs will be in the `_build` directory and can be viewed by opening `index.html` (which may itself 
be inside a directory called `html/` depending on what version of Sphinx is installed).

================================================
FILE: docs/_static/README.md
================================================
# Static Doc Directory

Add any paths that contain custom static files (such as style sheets) here,
relative to the `conf.py` file's directory. 
They are copied after the builtin static files,
so a file named "default.css" will overwrite the builtin "default.css".

The path to this folder is set in the Sphinx `conf.py` file in the line: 
```python
templates_path = ['_static']
```

## Examples of file to add to this directory
* Custom Cascading Style Sheets
* Custom JavaScript code
* Static logo images


================================================
FILE: docs/_templates/README.md
================================================
# Templates Doc Directory

Add any paths that contain templates here, relative to  
the `conf.py` file's directory.
They are copied after the builtin template files,
so a file named "page.html" will overwrite the builtin "page.html".

The path to this folder is set in the Sphinx `conf.py` file in the line: 
```python
html_static_path = ['_templates']
```

## Examples of file to add to this directory
* HTML extensions of stock pages like `page.html` or `layout.html`


================================================
FILE: docs/_templates/custom-class-template.rst
================================================
{{ fullname | escape | underline}}

.. currentmodule:: {{ module }}

.. autoclass:: {{ objname }}
   :members:
   :show-inheritance:
   :inherited-members:

   {% block methods %}
   .. automethod:: __init__

   {% if methods %}
   .. rubric:: {{ _('Methods') }}

   .. autosummary::
   {% for item in methods %}
      ~{{ name }}.{{ item }}
   {%- endfor %}
   {% endif %}
   {% endblock %}

   {% block attributes %}
   {% if attributes %}
   .. rubric:: {{ _('Attributes') }}

   .. autosummary::
   {% for item in attributes %}
      ~{{ name }}.{{ item }}
   {%- endfor %}
   {% endif %}
   {% endblock %}


================================================
FILE: docs/_templates/custom-module-template.rst
================================================
{{ fullname | escape | underline}}

.. automodule:: {{ fullname }}
  
   {% block attributes %}
   {% if attributes %}
   .. rubric:: Module Attributes

   .. autosummary::
      :toctree:
   {% for item in attributes %}
      {{ item }}
   {%- endfor %}
   {% endif %}
   {% endblock %}

   {% block functions %}
   {% if functions %}
   .. rubric:: {{ _('Functions') }}

   .. autosummary::
      :toctree:
   {% for item in functions %}
      {{ item }}
   {%- endfor %}
   {% endif %}
   {% endblock %}

   {% block classes %}
   {% if classes %}
   .. rubric:: {{ _('Classes') }}

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   {% for item in classes %}
      {{ item }}
   {%- endfor %}
   {% endif %}
   {% endblock %}

   {% block exceptions %}
   {% if exceptions %}
   .. rubric:: {{ _('Exceptions') }}

   .. autosummary::
      :toctree:
   {% for item in exceptions %}
      {{ item }}
   {%- endfor %}
   {% endif %}
   {% endblock %}

{% block modules %}
{% if modules %}
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:

{% for item in modules %}
   {{ item }}
{%- endfor %}
{% endif %}
{% endblock %}


================================================
FILE: docs/api.rst
================================================
API Documentation
=================

.. autosummary::
   :toctree: autosummary
   :template: custom-module-template.rst
   :recursive:

   espaloma.mm
   espaloma.nn
   espaloma.graphs
   espaloma.data 


================================================
FILE: docs/autosummary/espaloma.data.collection.alkethoh.rst
================================================
espaloma.data.collection.alkethoh
=================================

.. currentmodule:: espaloma.data.collection

.. autofunction:: alkethoh

================================================
FILE: docs/autosummary/espaloma.data.collection.esol.rst
================================================
espaloma.data.collection.esol
=============================

.. currentmodule:: espaloma.data.collection

.. autofunction:: esol

================================================
FILE: docs/autosummary/espaloma.data.collection.md17_new.rst
================================================
espaloma.data.collection.md17\_new
==================================

.. currentmodule:: espaloma.data.collection

.. autofunction:: md17_new

================================================
FILE: docs/autosummary/espaloma.data.collection.md17_old.rst
================================================
espaloma.data.collection.md17\_old
==================================

.. currentmodule:: espaloma.data.collection

.. autofunction:: md17_old

================================================
FILE: docs/autosummary/espaloma.data.collection.qca.rst
================================================
espaloma.data.collection.qca
============================

.. currentmodule:: espaloma.data.collection

.. autoclass:: qca
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~qca.__init__
      ~qca.bayer
      ~qca.benchmark
      ~qca.coverage
      ~qca.emolecules
      ~qca.fda
      ~qca.pfizer
      ~qca.roche
   
   
================================================
FILE: docs/autosummary/espaloma.data.collection.rst
================================================
espaloma.data.collection
========================

.. automodule:: espaloma.data.collection
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      alkethoh
      esol
      md17_new
      md17_old
      zinc
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      qca
   
   
================================================
FILE: docs/autosummary/espaloma.data.collection.zinc.rst
================================================
espaloma.data.collection.zinc
=============================

.. currentmodule:: espaloma.data.collection

.. autofunction:: zinc

================================================
FILE: docs/autosummary/espaloma.data.dataset.Dataset.rst
================================================
espaloma.data.dataset.Dataset
=============================

.. currentmodule:: espaloma.data.dataset

.. autoclass:: Dataset
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~Dataset.__init__
      ~Dataset.apply
      ~Dataset.load
      ~Dataset.save
      ~Dataset.shuffle
      ~Dataset.split
      ~Dataset.subsample
   
   
================================================
FILE: docs/autosummary/espaloma.data.dataset.GraphDataset.rst
================================================
espaloma.data.dataset.GraphDataset
==================================

.. currentmodule:: espaloma.data.dataset

.. autoclass:: GraphDataset
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~GraphDataset.__init__
      ~GraphDataset.apply
      ~GraphDataset.batch
      ~GraphDataset.load
      ~GraphDataset.save
      ~GraphDataset.shuffle
      ~GraphDataset.split
      ~GraphDataset.subsample
      ~GraphDataset.view
   
   
================================================
FILE: docs/autosummary/espaloma.data.dataset.rst
================================================
espaloma.data.dataset
=====================

.. automodule:: espaloma.data.dataset
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      Dataset
      GraphDataset
   
   
================================================
FILE: docs/autosummary/espaloma.data.md.MoleculeVacuumSimulation.rst
================================================
espaloma.data.md.MoleculeVacuumSimulation
=========================================

.. currentmodule:: espaloma.data.md

.. autoclass:: MoleculeVacuumSimulation
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~MoleculeVacuumSimulation.__init__
      ~MoleculeVacuumSimulation.run
      ~MoleculeVacuumSimulation.simulation_from_graph
   
   
================================================
FILE: docs/autosummary/espaloma.data.md.rst
================================================
espaloma.data.md
================

.. automodule:: espaloma.data.md
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      subtract_nonbonded_force
      subtract_nonbonded_force_except_14
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      MoleculeVacuumSimulation
   
   
================================================
FILE: docs/autosummary/espaloma.data.md.subtract_nonbonded_force.rst
================================================
espaloma.data.md.subtract\_nonbonded\_force
===========================================

.. currentmodule:: espaloma.data.md

.. autofunction:: subtract_nonbonded_force

================================================
FILE: docs/autosummary/espaloma.data.md.subtract_nonbonded_force_except_14.rst
================================================
espaloma.data.md.subtract\_nonbonded\_force\_except\_14
=======================================================

.. currentmodule:: espaloma.data.md

.. autofunction:: subtract_nonbonded_force_except_14

================================================
FILE: docs/autosummary/espaloma.data.md17_utils.get_molecule.rst
================================================
espaloma.data.md17\_utils.get\_molecule
=======================================

.. currentmodule:: espaloma.data.md17_utils

.. autofunction:: get_molecule

================================================
FILE: docs/autosummary/espaloma.data.md17_utils.realize_molecule.rst
================================================
espaloma.data.md17\_utils.realize\_molecule
===========================================

.. currentmodule:: espaloma.data.md17_utils

.. autofunction:: realize_molecule

================================================
FILE: docs/autosummary/espaloma.data.md17_utils.rst
================================================
espaloma.data.md17\_utils
=========================

.. automodule:: espaloma.data.md17_utils
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      get_molecule
      realize_molecule
      sum_offsets
   
   
================================================
FILE: docs/autosummary/espaloma.data.md17_utils.sum_offsets.rst
================================================
espaloma.data.md17\_utils.sum\_offsets
======================================

.. currentmodule:: espaloma.data.md17_utils

.. autofunction:: sum_offsets

================================================
FILE: docs/autosummary/espaloma.data.normalize.BaseNormalize.rst
================================================
espaloma.data.normalize.BaseNormalize
=====================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: BaseNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~BaseNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.DatasetLogNormalNormalize.rst
================================================
espaloma.data.normalize.DatasetLogNormalNormalize
=================================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: DatasetLogNormalNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~DatasetLogNormalNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.DatasetNormalNormalize.rst
================================================
espaloma.data.normalize.DatasetNormalNormalize
==============================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: DatasetNormalNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~DatasetNormalNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.ESOL100LogNormalNormalize.rst
================================================
espaloma.data.normalize.ESOL100LogNormalNormalize
=================================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: ESOL100LogNormalNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~ESOL100LogNormalNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.ESOL100NormalNormalize.rst
================================================
espaloma.data.normalize.ESOL100NormalNormalize
==============================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: ESOL100NormalNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~ESOL100NormalNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.NotNormalize.rst
================================================
espaloma.data.normalize.NotNormalize
====================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: NotNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~NotNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.PositiveNotNormalize.rst
================================================
espaloma.data.normalize.PositiveNotNormalize
============================================

.. currentmodule:: espaloma.data.normalize

.. autoclass:: PositiveNotNormalize
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~PositiveNotNormalize.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.data.normalize.rst
================================================
espaloma.data.normalize
=======================

.. automodule:: espaloma.data.normalize
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      BaseNormalize
      DatasetLogNormalNormalize
      DatasetNormalNormalize
      ESOL100LogNormalNormalize
      ESOL100NormalNormalize
      NotNormalize
      PositiveNotNormalize
   
   
================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.MolWithTargets.rst
================================================
espaloma.data.qcarchive\_utils.MolWithTargets
=============================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autoclass:: MolWithTargets
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~MolWithTargets.__init__
      ~MolWithTargets.count
      ~MolWithTargets.index
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~MolWithTargets.energies
      ~MolWithTargets.gradients
      ~MolWithTargets.offmol
      ~MolWithTargets.xyz
   
   
================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.breakdown_along_time_axis.rst
================================================
espaloma.data.qcarchive\_utils.breakdown\_along\_time\_axis
===========================================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: breakdown_along_time_axis

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.fetch_td_record.rst
================================================
espaloma.data.qcarchive\_utils.fetch\_td\_record
================================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: fetch_td_record

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_client.rst
================================================
espaloma.data.qcarchive\_utils.get\_client
==========================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: get_client

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_collection.rst
================================================
espaloma.data.qcarchive\_utils.get\_collection
==============================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: get_collection

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_energy_and_gradient.rst
================================================
espaloma.data.qcarchive\_utils.get\_energy\_and\_gradient
=========================================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: get_energy_and_gradient

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_graph.rst
================================================
espaloma.data.qcarchive\_utils.get\_graph
=========================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: get_graph

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.h5_to_dataset.rst
================================================
espaloma.data.qcarchive\_utils.h5\_to\_dataset
==============================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: h5_to_dataset

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.make_batch_size_consistent.rst
================================================
espaloma.data.qcarchive\_utils.make\_batch\_size\_consistent
============================================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: make_batch_size_consistent

================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.rst
================================================
espaloma.data.qcarchive\_utils
==============================

.. automodule:: espaloma.data.qcarchive_utils
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      breakdown_along_time_axis
      fetch_td_record
      get_client
      get_collection
      get_energy_and_gradient
      get_graph
      h5_to_dataset
      make_batch_size_consistent
      weight_by_snapshots
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      MolWithTargets
   
   
================================================
FILE: docs/autosummary/espaloma.data.qcarchive_utils.weight_by_snapshots.rst
================================================
espaloma.data.qcarchive\_utils.weight\_by\_snapshots
====================================================

.. currentmodule:: espaloma.data.qcarchive_utils

.. autofunction:: weight_by_snapshots

================================================
FILE: docs/autosummary/espaloma.data.rst
================================================
﻿espaloma.data
=============

.. automodule:: espaloma.data
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.data.collection
   espaloma.data.dataset
   espaloma.data.md
   espaloma.data.md17_utils
   espaloma.data.normalize
   espaloma.data.qcarchive_utils
   espaloma.data.utils


================================================
FILE: docs/autosummary/espaloma.data.utils.batch.rst
================================================
espaloma.data.utils.batch
=========================

.. currentmodule:: espaloma.data.utils

.. autofunction:: batch

================================================
FILE: docs/autosummary/espaloma.data.utils.collate_fn.rst
================================================
espaloma.data.utils.collate\_fn
===============================

.. currentmodule:: espaloma.data.utils

.. autofunction:: collate_fn

================================================
FILE: docs/autosummary/espaloma.data.utils.from_csv.rst
================================================
espaloma.data.utils.from\_csv
=============================

.. currentmodule:: espaloma.data.utils

.. autofunction:: from_csv

================================================
FILE: docs/autosummary/espaloma.data.utils.infer_mol_from_coordinates.rst
================================================
espaloma.data.utils.infer\_mol\_from\_coordinates
=================================================

.. currentmodule:: espaloma.data.utils

.. autofunction:: infer_mol_from_coordinates

================================================
FILE: docs/autosummary/espaloma.data.utils.make_temp_directory.rst
================================================
espaloma.data.utils.make\_temp\_directory
=========================================

.. currentmodule:: espaloma.data.utils

.. autofunction:: make_temp_directory

================================================
FILE: docs/autosummary/espaloma.data.utils.normalize.rst
================================================
espaloma.data.utils.normalize
=============================

.. currentmodule:: espaloma.data.utils

.. autofunction:: normalize

================================================
FILE: docs/autosummary/espaloma.data.utils.rst
================================================
espaloma.data.utils
===================

.. automodule:: espaloma.data.utils
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      batch
      collate_fn
      from_csv
      infer_mol_from_coordinates
      make_temp_directory
      normalize
      split
      sum_offsets
   
   
================================================
FILE: docs/autosummary/espaloma.data.utils.split.rst
================================================
espaloma.data.utils.split
=========================

.. currentmodule:: espaloma.data.utils

.. autofunction:: split

================================================
FILE: docs/autosummary/espaloma.data.utils.sum_offsets.rst
================================================
espaloma.data.utils.sum\_offsets
================================

.. currentmodule:: espaloma.data.utils

.. autofunction:: sum_offsets

================================================
FILE: docs/autosummary/espaloma.graphs.deploy.load_forcefield.rst
================================================
espaloma.graphs.deploy.load\_forcefield
=======================================

.. currentmodule:: espaloma.graphs.deploy

.. autofunction:: load_forcefield

================================================
FILE: docs/autosummary/espaloma.graphs.deploy.openmm_system_from_graph.rst
================================================
espaloma.graphs.deploy.openmm\_system\_from\_graph
==================================================

.. currentmodule:: espaloma.graphs.deploy

.. autofunction:: openmm_system_from_graph

================================================
FILE: docs/autosummary/espaloma.graphs.deploy.rst
================================================
espaloma.graphs.deploy
======================

.. automodule:: espaloma.graphs.deploy
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      load_forcefield
      openmm_system_from_graph
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.graph.BaseGraph.rst
================================================
espaloma.graphs.graph.BaseGraph
===============================

.. currentmodule:: espaloma.graphs.graph

.. autoclass:: BaseGraph
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~BaseGraph.__init__
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.graph.Graph.rst
================================================
espaloma.graphs.graph.Graph
===========================

.. currentmodule:: espaloma.graphs.graph

.. autoclass:: Graph
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~Graph.__init__
      ~Graph.get_heterograph_from_graph_and_mol
      ~Graph.get_homograph_from_mol
      ~Graph.load
      ~Graph.save
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~Graph.edata
      ~Graph.ndata
      ~Graph.nodes
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.graph.rst
================================================
espaloma.graphs.graph
=====================

.. automodule:: espaloma.graphs.graph
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      BaseGraph
      Graph
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.legacy_force_field.LegacyForceField.rst
================================================
espaloma.graphs.legacy\_force\_field.LegacyForceField
=====================================================

.. currentmodule:: espaloma.graphs.legacy_force_field

.. autoclass:: LegacyForceField
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~LegacyForceField.__init__
      ~LegacyForceField.baseline_energy
      ~LegacyForceField.multi_typing
      ~LegacyForceField.parametrize
      ~LegacyForceField.typing
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.legacy_force_field.rst
================================================
espaloma.graphs.legacy\_force\_field
====================================

.. automodule:: espaloma.graphs.legacy_force_field
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      LegacyForceField
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.rst
================================================
﻿espaloma.graphs
===============

.. automodule:: espaloma.graphs
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.graphs.deploy
   espaloma.graphs.graph
   espaloma.graphs.legacy_force_field
   espaloma.graphs.utils


================================================
FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.angle_indices.rst
================================================
espaloma.graphs.utils.offmol\_indices.angle\_indices
====================================================

.. currentmodule:: espaloma.graphs.utils.offmol_indices

.. autofunction:: angle_indices

================================================
FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.atom_indices.rst
================================================
espaloma.graphs.utils.offmol\_indices.atom\_indices
===================================================

.. currentmodule:: espaloma.graphs.utils.offmol_indices

.. autofunction:: atom_indices

================================================
FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.bond_indices.rst
================================================
espaloma.graphs.utils.offmol\_indices.bond\_indices
===================================================

.. currentmodule:: espaloma.graphs.utils.offmol_indices

.. autofunction:: bond_indices

================================================
FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.improper_torsion_indices.rst
================================================
espaloma.graphs.utils.offmol\_indices.improper\_torsion\_indices
================================================================

.. currentmodule:: espaloma.graphs.utils.offmol_indices

.. autofunction:: improper_torsion_indices

================================================
FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.proper_torsion_indices.rst
================================================
espaloma.graphs.utils.offmol\_indices.proper\_torsion\_indices
==============================================================

.. currentmodule:: espaloma.graphs.utils.offmol_indices

.. autofunction:: proper_torsion_indices

================================================
FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.rst
================================================
espaloma.graphs.utils.offmol\_indices
=====================================

.. automodule:: espaloma.graphs.utils.offmol_indices
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      angle_indices
      atom_indices
      bond_indices
      improper_torsion_indices
      proper_torsion_indices
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.duplicate_index_ordering.rst
================================================
espaloma.graphs.utils.read\_heterogeneous\_graph.duplicate\_index\_ordering
===========================================================================

.. currentmodule:: espaloma.graphs.utils.read_heterogeneous_graph

.. autofunction:: duplicate_index_ordering

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.from_homogeneous_and_mol.rst
================================================
espaloma.graphs.utils.read\_heterogeneous\_graph.from\_homogeneous\_and\_mol
============================================================================

.. currentmodule:: espaloma.graphs.utils.read_heterogeneous_graph

.. autofunction:: from_homogeneous_and_mol

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.relationship_indices_from_offmol.rst
================================================
espaloma.graphs.utils.read\_heterogeneous\_graph.relationship\_indices\_from\_offmol
====================================================================================

.. currentmodule:: espaloma.graphs.utils.read_heterogeneous_graph

.. autofunction:: relationship_indices_from_offmol

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.rst
================================================
espaloma.graphs.utils.read\_heterogeneous\_graph
================================================

.. automodule:: espaloma.graphs.utils.read_heterogeneous_graph
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      duplicate_index_ordering
      from_homogeneous_and_mol
      relationship_indices_from_offmol
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.fp_oe.rst
================================================
espaloma.graphs.utils.read\_homogeneous\_graph.fp\_oe
=====================================================

.. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph

.. autofunction:: fp_oe

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.fp_rdkit.rst
================================================
espaloma.graphs.utils.read\_homogeneous\_graph.fp\_rdkit
========================================================

.. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph

.. autofunction:: fp_rdkit

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.from_oemol.rst
================================================
espaloma.graphs.utils.read\_homogeneous\_graph.from\_oemol
==========================================================

.. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph

.. autofunction:: from_oemol

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.from_openff_toolkit_mol.rst
================================================
espaloma.graphs.utils.read\_homogeneous\_graph.from\_openff\_toolkit\_mol
=========================================================================

.. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph

.. autofunction:: from_openff_toolkit_mol

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.from_rdkit_mol.rst
================================================
espaloma.graphs.utils.read\_homogeneous\_graph.from\_rdkit\_mol
===============================================================

.. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph

.. autofunction:: from_rdkit_mol

================================================
FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.rst
================================================
espaloma.graphs.utils.read\_homogeneous\_graph
==============================================

.. automodule:: espaloma.graphs.utils.read_homogeneous_graph
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      fp_oe
      fp_rdkit
      from_oemol
      from_openff_toolkit_mol
      from_rdkit_mol
   
   
================================================
FILE: docs/autosummary/espaloma.graphs.utils.rst
================================================
espaloma.graphs.utils
=====================

.. automodule:: espaloma.graphs.utils
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.graphs.utils.offmol_indices
   espaloma.graphs.utils.read_heterogeneous_graph
   espaloma.graphs.utils.read_homogeneous_graph


================================================
FILE: docs/autosummary/espaloma.mm.angle.angle_high.rst
================================================
espaloma.mm.angle.angle\_high
=============================

.. currentmodule:: espaloma.mm.angle

.. autofunction:: angle_high

================================================
FILE: docs/autosummary/espaloma.mm.angle.bond_angle.rst
================================================
espaloma.mm.angle.bond\_angle
=============================

.. currentmodule:: espaloma.mm.angle

.. autofunction:: bond_angle

================================================
FILE: docs/autosummary/espaloma.mm.angle.bond_bond.rst
================================================
espaloma.mm.angle.bond\_bond
============================

.. currentmodule:: espaloma.mm.angle

.. autofunction:: bond_bond

================================================
FILE: docs/autosummary/espaloma.mm.angle.harmonic_angle.rst
================================================
espaloma.mm.angle.harmonic\_angle
=================================

.. currentmodule:: espaloma.mm.angle

.. autofunction:: harmonic_angle

================================================
FILE: docs/autosummary/espaloma.mm.angle.linear_mixture_angle.rst
================================================
espaloma.mm.angle.linear\_mixture\_angle
========================================

.. currentmodule:: espaloma.mm.angle

.. autofunction:: linear_mixture_angle

================================================
FILE: docs/autosummary/espaloma.mm.angle.rst
================================================
espaloma.mm.angle
=================

.. automodule:: espaloma.mm.angle
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      angle_high
      bond_angle
      bond_bond
      harmonic_angle
      linear_mixture_angle
      urey_bradley
   
   
================================================
FILE: docs/autosummary/espaloma.mm.angle.urey_bradley.rst
================================================
espaloma.mm.angle.urey\_bradley
===============================

.. currentmodule:: espaloma.mm.angle

.. autofunction:: urey_bradley

================================================
FILE: docs/autosummary/espaloma.mm.bond.bond_high.rst
================================================
espaloma.mm.bond.bond\_high
===========================

.. currentmodule:: espaloma.mm.bond

.. autofunction:: bond_high

================================================
FILE: docs/autosummary/espaloma.mm.bond.gaussian_bond.rst
================================================
espaloma.mm.bond.gaussian\_bond
===============================

.. currentmodule:: espaloma.mm.bond

.. autofunction:: gaussian_bond

================================================
FILE: docs/autosummary/espaloma.mm.bond.harmonic_bond.rst
================================================
espaloma.mm.bond.harmonic\_bond
===============================

.. currentmodule:: espaloma.mm.bond

.. autofunction:: harmonic_bond

================================================
FILE: docs/autosummary/espaloma.mm.bond.linear_mixture_bond.rst
================================================
espaloma.mm.bond.linear\_mixture\_bond
======================================

.. currentmodule:: espaloma.mm.bond

.. autofunction:: linear_mixture_bond

================================================
FILE: docs/autosummary/espaloma.mm.bond.rst
================================================
espaloma.mm.bond
================

.. automodule:: espaloma.mm.bond
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      bond_high
      gaussian_bond
      harmonic_bond
      linear_mixture_bond
   
   
================================================
FILE: docs/autosummary/espaloma.mm.energy.CarryII.rst
================================================
espaloma.mm.energy.CarryII
==========================

.. currentmodule:: espaloma.mm.energy

.. autoclass:: CarryII
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~CarryII.__init__
      ~CarryII.add_module
      ~CarryII.apply
      ~CarryII.bfloat16
      ~CarryII.buffers
      ~CarryII.children
      ~CarryII.cpu
      ~CarryII.cuda
      ~CarryII.double
      ~CarryII.eval
      ~CarryII.extra_repr
      ~CarryII.float
      ~CarryII.forward
      ~CarryII.half
      ~CarryII.load_state_dict
      ~CarryII.modules
      ~CarryII.named_buffers
      ~CarryII.named_children
      ~CarryII.named_modules
      ~CarryII.named_parameters
      ~CarryII.parameters
      ~CarryII.register_backward_hook
      ~CarryII.register_buffer
      ~CarryII.register_forward_hook
      ~CarryII.register_forward_pre_hook
      ~CarryII.register_parameter
      ~CarryII.requires_grad_
      ~CarryII.share_memory
      ~CarryII.state_dict
      ~CarryII.to
      ~CarryII.train
      ~CarryII.type
      ~CarryII.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~CarryII.T_destination
      ~CarryII.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.mm.energy.EnergyInGraph.rst
================================================
espaloma.mm.energy.EnergyInGraph
================================

.. currentmodule:: espaloma.mm.energy

.. autoclass:: EnergyInGraph
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~EnergyInGraph.__init__
      ~EnergyInGraph.add_module
      ~EnergyInGraph.apply
      ~EnergyInGraph.bfloat16
      ~EnergyInGraph.buffers
      ~EnergyInGraph.children
      ~EnergyInGraph.cpu
      ~EnergyInGraph.cuda
      ~EnergyInGraph.double
      ~EnergyInGraph.eval
      ~EnergyInGraph.extra_repr
      ~EnergyInGraph.float
      ~EnergyInGraph.forward
      ~EnergyInGraph.half
      ~EnergyInGraph.load_state_dict
      ~EnergyInGraph.modules
      ~EnergyInGraph.named_buffers
      ~EnergyInGraph.named_children
      ~EnergyInGraph.named_modules
      ~EnergyInGraph.named_parameters
      ~EnergyInGraph.parameters
      ~EnergyInGraph.register_backward_hook
      ~EnergyInGraph.register_buffer
      ~EnergyInGraph.register_forward_hook
      ~EnergyInGraph.register_forward_pre_hook
      ~EnergyInGraph.register_parameter
      ~EnergyInGraph.requires_grad_
      ~EnergyInGraph.share_memory
      ~EnergyInGraph.state_dict
      ~EnergyInGraph.to
      ~EnergyInGraph.train
      ~EnergyInGraph.type
      ~EnergyInGraph.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~EnergyInGraph.T_destination
      ~EnergyInGraph.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.mm.energy.EnergyInGraphII.rst
================================================
espaloma.mm.energy.EnergyInGraphII
==================================

.. currentmodule:: espaloma.mm.energy

.. autoclass:: EnergyInGraphII
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~EnergyInGraphII.__init__
      ~EnergyInGraphII.add_module
      ~EnergyInGraphII.apply
      ~EnergyInGraphII.bfloat16
      ~EnergyInGraphII.buffers
      ~EnergyInGraphII.children
      ~EnergyInGraphII.cpu
      ~EnergyInGraphII.cuda
      ~EnergyInGraphII.double
      ~EnergyInGraphII.eval
      ~EnergyInGraphII.extra_repr
      ~EnergyInGraphII.float
      ~EnergyInGraphII.forward
      ~EnergyInGraphII.half
      ~EnergyInGraphII.load_state_dict
      ~EnergyInGraphII.modules
      ~EnergyInGraphII.named_buffers
      ~EnergyInGraphII.named_children
      ~EnergyInGraphII.named_modules
      ~EnergyInGraphII.named_parameters
      ~EnergyInGraphII.parameters
      ~EnergyInGraphII.register_backward_hook
      ~EnergyInGraphII.register_buffer
      ~EnergyInGraphII.register_forward_hook
      ~EnergyInGraphII.register_forward_pre_hook
      ~EnergyInGraphII.register_parameter
      ~EnergyInGraphII.requires_grad_
      ~EnergyInGraphII.share_memory
      ~EnergyInGraphII.state_dict
      ~EnergyInGraphII.to
      ~EnergyInGraphII.train
      ~EnergyInGraphII.type
      ~EnergyInGraphII.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~EnergyInGraphII.T_destination
      ~EnergyInGraphII.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_angle.rst
================================================
espaloma.mm.energy.apply\_angle
===============================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_angle

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_angle_ii.rst
================================================
espaloma.mm.energy.apply\_angle\_ii
===================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_angle_ii

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_angle_linear_mixture.rst
================================================
espaloma.mm.energy.apply\_angle\_linear\_mixture
================================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_angle_linear_mixture

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_bond.rst
================================================
espaloma.mm.energy.apply\_bond
==============================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_bond

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_bond_gaussian.rst
================================================
espaloma.mm.energy.apply\_bond\_gaussian
========================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_bond_gaussian

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_bond_ii.rst
================================================
espaloma.mm.energy.apply\_bond\_ii
==================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_bond_ii

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_bond_linear_mixture.rst
================================================
espaloma.mm.energy.apply\_bond\_linear\_mixture
===============================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_bond_linear_mixture

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_improper_torsion.rst
================================================
espaloma.mm.energy.apply\_improper\_torsion
===========================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_improper_torsion

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_nonbonded.rst
================================================
espaloma.mm.energy.apply\_nonbonded
===================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_nonbonded

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_torsion.rst
================================================
espaloma.mm.energy.apply\_torsion
=================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_torsion

================================================
FILE: docs/autosummary/espaloma.mm.energy.apply_torsion_ii.rst
================================================
espaloma.mm.energy.apply\_torsion\_ii
=====================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: apply_torsion_ii

================================================
FILE: docs/autosummary/espaloma.mm.energy.energy_in_graph.rst
================================================
espaloma.mm.energy.energy\_in\_graph
====================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: energy_in_graph

================================================
FILE: docs/autosummary/espaloma.mm.energy.energy_in_graph_ii.rst
================================================
espaloma.mm.energy.energy\_in\_graph\_ii
========================================

.. currentmodule:: espaloma.mm.energy

.. autofunction:: energy_in_graph_ii

================================================
FILE: docs/autosummary/espaloma.mm.energy.rst
================================================
espaloma.mm.energy
==================

.. automodule:: espaloma.mm.energy
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      apply_angle
      apply_angle_ii
      apply_angle_linear_mixture
      apply_bond
      apply_bond_gaussian
      apply_bond_ii
      apply_bond_linear_mixture
      apply_improper_torsion
      apply_nonbonded
      apply_torsion
      apply_torsion_ii
      energy_in_graph
      energy_in_graph_ii
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      CarryII
      EnergyInGraph
      EnergyInGraphII
   
   
================================================
FILE: docs/autosummary/espaloma.mm.functional.gaussian.rst
================================================
espaloma.mm.functional.gaussian
===============================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: gaussian

================================================
FILE: docs/autosummary/espaloma.mm.functional.harmonic.rst
================================================
espaloma.mm.functional.harmonic
===============================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: harmonic

================================================
FILE: docs/autosummary/espaloma.mm.functional.harmonic_harmonic_coupled.rst
================================================
espaloma.mm.functional.harmonic\_harmonic\_coupled
==================================================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: harmonic_harmonic_coupled

================================================
FILE: docs/autosummary/espaloma.mm.functional.harmonic_harmonic_periodic_coupled.rst
================================================
espaloma.mm.functional.harmonic\_harmonic\_periodic\_coupled
============================================================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: harmonic_harmonic_periodic_coupled

================================================
FILE: docs/autosummary/espaloma.mm.functional.harmonic_periodic_coupled.rst
================================================
espaloma.mm.functional.harmonic\_periodic\_coupled
==================================================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: harmonic_periodic_coupled

================================================
FILE: docs/autosummary/espaloma.mm.functional.linear_mixture.rst
================================================
espaloma.mm.functional.linear\_mixture
======================================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: linear_mixture

================================================
FILE: docs/autosummary/espaloma.mm.functional.linear_mixture_to_original.rst
================================================
espaloma.mm.functional.linear\_mixture\_to\_original
====================================================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: linear_mixture_to_original

================================================
FILE: docs/autosummary/espaloma.mm.functional.lj.rst
================================================
espaloma.mm.functional.lj
=========================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: lj

================================================
FILE: docs/autosummary/espaloma.mm.functional.periodic.rst
================================================
espaloma.mm.functional.periodic
===============================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: periodic

================================================
FILE: docs/autosummary/espaloma.mm.functional.periodic_fixed_phases.rst
================================================
espaloma.mm.functional.periodic\_fixed\_phases
==============================================

.. currentmodule:: espaloma.mm.functional

.. autofunction:: periodic_fixed_phases

================================================
FILE: docs/autosummary/espaloma.mm.functional.rst
================================================
espaloma.mm.functional
======================

.. automodule:: espaloma.mm.functional
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      gaussian
      harmonic
      harmonic_harmonic_coupled
      harmonic_harmonic_periodic_coupled
      harmonic_periodic_coupled
      linear_mixture
      linear_mixture_to_original
      lj
      periodic
      periodic_fixed_phases
   
   
================================================
FILE: docs/autosummary/espaloma.mm.geometry.GeometryInGraph.rst
================================================
espaloma.mm.geometry.GeometryInGraph
====================================

.. currentmodule:: espaloma.mm.geometry

.. autoclass:: GeometryInGraph
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~GeometryInGraph.__init__
      ~GeometryInGraph.add_module
      ~GeometryInGraph.apply
      ~GeometryInGraph.bfloat16
      ~GeometryInGraph.buffers
      ~GeometryInGraph.children
      ~GeometryInGraph.cpu
      ~GeometryInGraph.cuda
      ~GeometryInGraph.double
      ~GeometryInGraph.eval
      ~GeometryInGraph.extra_repr
      ~GeometryInGraph.float
      ~GeometryInGraph.forward
      ~GeometryInGraph.half
      ~GeometryInGraph.load_state_dict
      ~GeometryInGraph.modules
      ~GeometryInGraph.named_buffers
      ~GeometryInGraph.named_children
      ~GeometryInGraph.named_modules
      ~GeometryInGraph.named_parameters
      ~GeometryInGraph.parameters
      ~GeometryInGraph.register_backward_hook
      ~GeometryInGraph.register_buffer
      ~GeometryInGraph.register_forward_hook
      ~GeometryInGraph.register_forward_pre_hook
      ~GeometryInGraph.register_parameter
      ~GeometryInGraph.requires_grad_
      ~GeometryInGraph.share_memory
      ~GeometryInGraph.state_dict
      ~GeometryInGraph.to
      ~GeometryInGraph.train
      ~GeometryInGraph.type
      ~GeometryInGraph.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~GeometryInGraph.T_destination
      ~GeometryInGraph.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.mm.geometry.angle.rst
================================================
espaloma.mm.geometry.angle
==========================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: angle

================================================
FILE: docs/autosummary/espaloma.mm.geometry.apply_angle.rst
================================================
espaloma.mm.geometry.apply\_angle
=================================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: apply_angle

================================================
FILE: docs/autosummary/espaloma.mm.geometry.apply_bond.rst
================================================
espaloma.mm.geometry.apply\_bond
================================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: apply_bond

================================================
FILE: docs/autosummary/espaloma.mm.geometry.apply_torsion.rst
================================================
espaloma.mm.geometry.apply\_torsion
===================================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: apply_torsion

================================================
FILE: docs/autosummary/espaloma.mm.geometry.copy_src.rst
================================================
espaloma.mm.geometry.copy\_src
==============================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: copy_src

================================================
FILE: docs/autosummary/espaloma.mm.geometry.dihedral.rst
================================================
espaloma.mm.geometry.dihedral
=============================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: dihedral

================================================
FILE: docs/autosummary/espaloma.mm.geometry.distance.rst
================================================
espaloma.mm.geometry.distance
=============================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: distance

================================================
FILE: docs/autosummary/espaloma.mm.geometry.geometry_in_graph.rst
================================================
espaloma.mm.geometry.geometry\_in\_graph
========================================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: geometry_in_graph

================================================
FILE: docs/autosummary/espaloma.mm.geometry.reduce_stack.rst
================================================
espaloma.mm.geometry.reduce\_stack
==================================

.. currentmodule:: espaloma.mm.geometry

.. autofunction:: reduce_stack

================================================
FILE: docs/autosummary/espaloma.mm.geometry.rst
================================================
espaloma.mm.geometry
====================

.. automodule:: espaloma.mm.geometry
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      angle
      apply_angle
      apply_bond
      apply_torsion
      copy_src
      dihedral
      distance
      geometry_in_graph
      reduce_stack
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      GeometryInGraph
   
   
================================================
FILE: docs/autosummary/espaloma.mm.nonbonded.arithmetic_mean.rst
================================================
espaloma.mm.nonbonded.arithmetic\_mean
======================================

.. currentmodule:: espaloma.mm.nonbonded

.. autofunction:: arithmetic_mean

================================================
FILE: docs/autosummary/espaloma.mm.nonbonded.geometric_mean.rst
================================================
espaloma.mm.nonbonded.geometric\_mean
=====================================

.. currentmodule:: espaloma.mm.nonbonded

.. autofunction:: geometric_mean

================================================
FILE: docs/autosummary/espaloma.mm.nonbonded.lj_12_6.rst
================================================
espaloma.mm.nonbonded.lj\_12\_6
===============================

.. currentmodule:: espaloma.mm.nonbonded

.. autofunction:: lj_12_6

================================================
FILE: docs/autosummary/espaloma.mm.nonbonded.lj_9_6.rst
================================================
espaloma.mm.nonbonded.lj\_9\_6
==============================

.. currentmodule:: espaloma.mm.nonbonded

.. autofunction:: lj_9_6

================================================
FILE: docs/autosummary/espaloma.mm.nonbonded.lorentz_berthelot.rst
================================================
espaloma.mm.nonbonded.lorentz\_berthelot
========================================

.. currentmodule:: espaloma.mm.nonbonded

.. autofunction:: lorentz_berthelot

================================================
FILE: docs/autosummary/espaloma.mm.nonbonded.rst
================================================
espaloma.mm.nonbonded
=====================

.. automodule:: espaloma.mm.nonbonded
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      arithmetic_mean
      geometric_mean
      lj_12_6
      lj_9_6
      lorentz_berthelot
   
   
================================================
FILE: docs/autosummary/espaloma.mm.rst
================================================
﻿espaloma.mm
===========

.. automodule:: espaloma.mm
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.mm.angle
   espaloma.mm.bond
   espaloma.mm.energy
   espaloma.mm.functional
   espaloma.mm.geometry
   espaloma.mm.nonbonded
   espaloma.mm.torsion


================================================
FILE: docs/autosummary/espaloma.mm.torsion.angle_angle.rst
================================================
espaloma.mm.torsion.angle\_angle
================================

.. currentmodule:: espaloma.mm.torsion

.. autofunction:: angle_angle

================================================
FILE: docs/autosummary/espaloma.mm.torsion.angle_angle_torsion.rst
================================================
espaloma.mm.torsion.angle\_angle\_torsion
=========================================

.. currentmodule:: espaloma.mm.torsion

.. autofunction:: angle_angle_torsion

================================================
FILE: docs/autosummary/espaloma.mm.torsion.angle_torsion.rst
================================================
espaloma.mm.torsion.angle\_torsion
==================================

.. currentmodule:: espaloma.mm.torsion

.. autofunction:: angle_torsion

================================================
FILE: docs/autosummary/espaloma.mm.torsion.bond_torsion.rst
================================================
espaloma.mm.torsion.bond\_torsion
=================================

.. currentmodule:: espaloma.mm.torsion

.. autofunction:: bond_torsion

================================================
FILE: docs/autosummary/espaloma.mm.torsion.periodic_torsion.rst
================================================
espaloma.mm.torsion.periodic\_torsion
=====================================

.. currentmodule:: espaloma.mm.torsion

.. autofunction:: periodic_torsion

================================================
FILE: docs/autosummary/espaloma.mm.torsion.rst
================================================
espaloma.mm.torsion
===================

.. automodule:: espaloma.mm.torsion
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      angle_angle
      angle_angle_torsion
      angle_torsion
      bond_torsion
      periodic_torsion
   
   
================================================
FILE: docs/autosummary/espaloma.nn.baselines.FreeParameterBaseline.rst
================================================
espaloma.nn.baselines.FreeParameterBaseline
===========================================

.. currentmodule:: espaloma.nn.baselines

.. autoclass:: FreeParameterBaseline
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~FreeParameterBaseline.__init__
      ~FreeParameterBaseline.add_module
      ~FreeParameterBaseline.apply
      ~FreeParameterBaseline.bfloat16
      ~FreeParameterBaseline.buffers
      ~FreeParameterBaseline.children
      ~FreeParameterBaseline.cpu
      ~FreeParameterBaseline.cuda
      ~FreeParameterBaseline.double
      ~FreeParameterBaseline.eval
      ~FreeParameterBaseline.extra_repr
      ~FreeParameterBaseline.float
      ~FreeParameterBaseline.forward
      ~FreeParameterBaseline.half
      ~FreeParameterBaseline.load_state_dict
      ~FreeParameterBaseline.modules
      ~FreeParameterBaseline.named_buffers
      ~FreeParameterBaseline.named_children
      ~FreeParameterBaseline.named_modules
      ~FreeParameterBaseline.named_parameters
      ~FreeParameterBaseline.parameters
      ~FreeParameterBaseline.register_backward_hook
      ~FreeParameterBaseline.register_buffer
      ~FreeParameterBaseline.register_forward_hook
      ~FreeParameterBaseline.register_forward_pre_hook
      ~FreeParameterBaseline.register_parameter
      ~FreeParameterBaseline.requires_grad_
      ~FreeParameterBaseline.share_memory
      ~FreeParameterBaseline.state_dict
      ~FreeParameterBaseline.to
      ~FreeParameterBaseline.train
      ~FreeParameterBaseline.type
      ~FreeParameterBaseline.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~FreeParameterBaseline.T_destination
      ~FreeParameterBaseline.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.baselines.FreeParameterBaselineInitMean.rst
================================================
espaloma.nn.baselines.FreeParameterBaselineInitMean
===================================================

.. currentmodule:: espaloma.nn.baselines

.. autoclass:: FreeParameterBaselineInitMean
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~FreeParameterBaselineInitMean.__init__
      ~FreeParameterBaselineInitMean.add_module
      ~FreeParameterBaselineInitMean.apply
      ~FreeParameterBaselineInitMean.bfloat16
      ~FreeParameterBaselineInitMean.buffers
      ~FreeParameterBaselineInitMean.children
      ~FreeParameterBaselineInitMean.cpu
      ~FreeParameterBaselineInitMean.cuda
      ~FreeParameterBaselineInitMean.double
      ~FreeParameterBaselineInitMean.eval
      ~FreeParameterBaselineInitMean.extra_repr
      ~FreeParameterBaselineInitMean.float
      ~FreeParameterBaselineInitMean.forward
      ~FreeParameterBaselineInitMean.half
      ~FreeParameterBaselineInitMean.load_state_dict
      ~FreeParameterBaselineInitMean.modules
      ~FreeParameterBaselineInitMean.named_buffers
      ~FreeParameterBaselineInitMean.named_children
      ~FreeParameterBaselineInitMean.named_modules
      ~FreeParameterBaselineInitMean.named_parameters
      ~FreeParameterBaselineInitMean.parameters
      ~FreeParameterBaselineInitMean.register_backward_hook
      ~FreeParameterBaselineInitMean.register_buffer
      ~FreeParameterBaselineInitMean.register_forward_hook
      ~FreeParameterBaselineInitMean.register_forward_pre_hook
      ~FreeParameterBaselineInitMean.register_parameter
      ~FreeParameterBaselineInitMean.requires_grad_
      ~FreeParameterBaselineInitMean.share_memory
      ~FreeParameterBaselineInitMean.state_dict
      ~FreeParameterBaselineInitMean.to
      ~FreeParameterBaselineInitMean.train
      ~FreeParameterBaselineInitMean.type
      ~FreeParameterBaselineInitMean.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~FreeParameterBaselineInitMean.T_destination
      ~FreeParameterBaselineInitMean.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.baselines.rst
================================================
espaloma.nn.baselines
=====================

.. automodule:: espaloma.nn.baselines
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      FreeParameterBaseline
      FreeParameterBaselineInitMean
   
   
================================================
FILE: docs/autosummary/espaloma.nn.layers.dgl_legacy.GN.rst
================================================
﻿espaloma.nn.layers.dgl\_legacy.gn
=================================

.. currentmodule:: espaloma.nn.layers.dgl_legacy

.. autofunction:: gn

================================================
FILE: docs/autosummary/espaloma.nn.layers.dgl_legacy.rst
================================================
espaloma.nn.layers.dgl\_legacy
==============================

.. automodule:: espaloma.nn.layers.dgl_legacy
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      gn
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      GN
   
   
================================================
FILE: docs/autosummary/espaloma.nn.layers.rst
================================================
espaloma.nn.layers
==================

.. automodule:: espaloma.nn.layers
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.nn.layers.dgl_legacy


================================================
FILE: docs/autosummary/espaloma.nn.readout.base_readout.BaseReadout.rst
================================================
espaloma.nn.readout.base\_readout.BaseReadout
=============================================

.. currentmodule:: espaloma.nn.readout.base_readout

.. autoclass:: BaseReadout
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~BaseReadout.__init__
      ~BaseReadout.add_module
      ~BaseReadout.apply
      ~BaseReadout.bfloat16
      ~BaseReadout.buffers
      ~BaseReadout.children
      ~BaseReadout.cpu
      ~BaseReadout.cuda
      ~BaseReadout.double
      ~BaseReadout.eval
      ~BaseReadout.extra_repr
      ~BaseReadout.float
      ~BaseReadout.forward
      ~BaseReadout.half
      ~BaseReadout.load_state_dict
      ~BaseReadout.modules
      ~BaseReadout.named_buffers
      ~BaseReadout.named_children
      ~BaseReadout.named_modules
      ~BaseReadout.named_parameters
      ~BaseReadout.parameters
      ~BaseReadout.register_backward_hook
      ~BaseReadout.register_buffer
      ~BaseReadout.register_forward_hook
      ~BaseReadout.register_forward_pre_hook
      ~BaseReadout.register_parameter
      ~BaseReadout.requires_grad_
      ~BaseReadout.share_memory
      ~BaseReadout.state_dict
      ~BaseReadout.to
      ~BaseReadout.train
      ~BaseReadout.type
      ~BaseReadout.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~BaseReadout.T_destination
      ~BaseReadout.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.base_readout.rst
================================================
espaloma.nn.readout.base\_readout
=================================

.. automodule:: espaloma.nn.readout.base_readout
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      BaseReadout
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.charge_equilibrium.ChargeEquilibrium.rst
================================================
espaloma.nn.readout.charge\_equilibrium.ChargeEquilibrium
=========================================================

.. currentmodule:: espaloma.nn.readout.charge_equilibrium

.. autoclass:: ChargeEquilibrium
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~ChargeEquilibrium.__init__
      ~ChargeEquilibrium.add_module
      ~ChargeEquilibrium.apply
      ~ChargeEquilibrium.bfloat16
      ~ChargeEquilibrium.buffers
      ~ChargeEquilibrium.children
      ~ChargeEquilibrium.cpu
      ~ChargeEquilibrium.cuda
      ~ChargeEquilibrium.double
      ~ChargeEquilibrium.eval
      ~ChargeEquilibrium.extra_repr
      ~ChargeEquilibrium.float
      ~ChargeEquilibrium.forward
      ~ChargeEquilibrium.half
      ~ChargeEquilibrium.load_state_dict
      ~ChargeEquilibrium.modules
      ~ChargeEquilibrium.named_buffers
      ~ChargeEquilibrium.named_children
      ~ChargeEquilibrium.named_modules
      ~ChargeEquilibrium.named_parameters
      ~ChargeEquilibrium.parameters
      ~ChargeEquilibrium.register_backward_hook
      ~ChargeEquilibrium.register_buffer
      ~ChargeEquilibrium.register_forward_hook
      ~ChargeEquilibrium.register_forward_pre_hook
      ~ChargeEquilibrium.register_parameter
      ~ChargeEquilibrium.requires_grad_
      ~ChargeEquilibrium.share_memory
      ~ChargeEquilibrium.state_dict
      ~ChargeEquilibrium.to
      ~ChargeEquilibrium.train
      ~ChargeEquilibrium.type
      ~ChargeEquilibrium.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~ChargeEquilibrium.T_destination
      ~ChargeEquilibrium.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.charge_equilibrium.get_charges.rst
================================================
espaloma.nn.readout.charge\_equilibrium.get\_charges
====================================================

.. currentmodule:: espaloma.nn.readout.charge_equilibrium

.. autofunction:: get_charges

================================================
FILE: docs/autosummary/espaloma.nn.readout.charge_equilibrium.rst
================================================
espaloma.nn.readout.charge\_equilibrium
=======================================

.. automodule:: espaloma.nn.readout.charge_equilibrium
  
   
   .. rubric:: Functions

   .. autosummary::
      :toctree:
   
      get_charges
   
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      ChargeEquilibrium
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.graph_level_readout.GraphLevelReadout.rst
================================================
espaloma.nn.readout.graph\_level\_readout.GraphLevelReadout
===========================================================

.. currentmodule:: espaloma.nn.readout.graph_level_readout

.. autoclass:: GraphLevelReadout
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~GraphLevelReadout.__init__
      ~GraphLevelReadout.add_module
      ~GraphLevelReadout.apply
      ~GraphLevelReadout.bfloat16
      ~GraphLevelReadout.buffers
      ~GraphLevelReadout.children
      ~GraphLevelReadout.cpu
      ~GraphLevelReadout.cuda
      ~GraphLevelReadout.double
      ~GraphLevelReadout.eval
      ~GraphLevelReadout.extra_repr
      ~GraphLevelReadout.float
      ~GraphLevelReadout.forward
      ~GraphLevelReadout.half
      ~GraphLevelReadout.load_state_dict
      ~GraphLevelReadout.modules
      ~GraphLevelReadout.named_buffers
      ~GraphLevelReadout.named_children
      ~GraphLevelReadout.named_modules
      ~GraphLevelReadout.named_parameters
      ~GraphLevelReadout.parameters
      ~GraphLevelReadout.register_backward_hook
      ~GraphLevelReadout.register_buffer
      ~GraphLevelReadout.register_forward_hook
      ~GraphLevelReadout.register_forward_pre_hook
      ~GraphLevelReadout.register_parameter
      ~GraphLevelReadout.requires_grad_
      ~GraphLevelReadout.share_memory
      ~GraphLevelReadout.state_dict
      ~GraphLevelReadout.to
      ~GraphLevelReadout.train
      ~GraphLevelReadout.type
      ~GraphLevelReadout.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~GraphLevelReadout.T_destination
      ~GraphLevelReadout.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.graph_level_readout.rst
================================================
espaloma.nn.readout.graph\_level\_readout
=========================================

.. automodule:: espaloma.nn.readout.graph_level_readout
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      GraphLevelReadout
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.janossy.ExpCoefficients.rst
================================================
espaloma.nn.readout.janossy.ExpCoefficients
===========================================

.. currentmodule:: espaloma.nn.readout.janossy

.. autoclass:: ExpCoefficients
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~ExpCoefficients.__init__
      ~ExpCoefficients.add_module
      ~ExpCoefficients.apply
      ~ExpCoefficients.bfloat16
      ~ExpCoefficients.buffers
      ~ExpCoefficients.children
      ~ExpCoefficients.cpu
      ~ExpCoefficients.cuda
      ~ExpCoefficients.double
      ~ExpCoefficients.eval
      ~ExpCoefficients.extra_repr
      ~ExpCoefficients.float
      ~ExpCoefficients.forward
      ~ExpCoefficients.half
      ~ExpCoefficients.load_state_dict
      ~ExpCoefficients.modules
      ~ExpCoefficients.named_buffers
      ~ExpCoefficients.named_children
      ~ExpCoefficients.named_modules
      ~ExpCoefficients.named_parameters
      ~ExpCoefficients.parameters
      ~ExpCoefficients.register_backward_hook
      ~ExpCoefficients.register_buffer
      ~ExpCoefficients.register_forward_hook
      ~ExpCoefficients.register_forward_pre_hook
      ~ExpCoefficients.register_parameter
      ~ExpCoefficients.requires_grad_
      ~ExpCoefficients.share_memory
      ~ExpCoefficients.state_dict
      ~ExpCoefficients.to
      ~ExpCoefficients.train
      ~ExpCoefficients.type
      ~ExpCoefficients.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~ExpCoefficients.T_destination
      ~ExpCoefficients.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.janossy.JanossyPooling.rst
================================================
espaloma.nn.readout.janossy.JanossyPooling
==========================================

.. currentmodule:: espaloma.nn.readout.janossy

.. autoclass:: JanossyPooling
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~JanossyPooling.__init__
      ~JanossyPooling.add_module
      ~JanossyPooling.apply
      ~JanossyPooling.bfloat16
      ~JanossyPooling.buffers
      ~JanossyPooling.children
      ~JanossyPooling.cpu
      ~JanossyPooling.cuda
      ~JanossyPooling.double
      ~JanossyPooling.eval
      ~JanossyPooling.extra_repr
      ~JanossyPooling.float
      ~JanossyPooling.forward
      ~JanossyPooling.half
      ~JanossyPooling.load_state_dict
      ~JanossyPooling.modules
      ~JanossyPooling.named_buffers
      ~JanossyPooling.named_children
      ~JanossyPooling.named_modules
      ~JanossyPooling.named_parameters
      ~JanossyPooling.parameters
      ~JanossyPooling.register_backward_hook
      ~JanossyPooling.register_buffer
      ~JanossyPooling.register_forward_hook
      ~JanossyPooling.register_forward_pre_hook
      ~JanossyPooling.register_parameter
      ~JanossyPooling.requires_grad_
      ~JanossyPooling.share_memory
      ~JanossyPooling.state_dict
      ~JanossyPooling.to
      ~JanossyPooling.train
      ~JanossyPooling.type
      ~JanossyPooling.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~JanossyPooling.T_destination
      ~JanossyPooling.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.janossy.JanossyPoolingImproper.rst
================================================
espaloma.nn.readout.janossy.JanossyPoolingImproper
==================================================

.. currentmodule:: espaloma.nn.readout.janossy

.. autoclass:: JanossyPoolingImproper
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~JanossyPoolingImproper.__init__
      ~JanossyPoolingImproper.add_module
      ~JanossyPoolingImproper.apply
      ~JanossyPoolingImproper.bfloat16
      ~JanossyPoolingImproper.buffers
      ~JanossyPoolingImproper.children
      ~JanossyPoolingImproper.cpu
      ~JanossyPoolingImproper.cuda
      ~JanossyPoolingImproper.double
      ~JanossyPoolingImproper.eval
      ~JanossyPoolingImproper.extra_repr
      ~JanossyPoolingImproper.float
      ~JanossyPoolingImproper.forward
      ~JanossyPoolingImproper.half
      ~JanossyPoolingImproper.load_state_dict
      ~JanossyPoolingImproper.modules
      ~JanossyPoolingImproper.named_buffers
      ~JanossyPoolingImproper.named_children
      ~JanossyPoolingImproper.named_modules
      ~JanossyPoolingImproper.named_parameters
      ~JanossyPoolingImproper.parameters
      ~JanossyPoolingImproper.register_backward_hook
      ~JanossyPoolingImproper.register_buffer
      ~JanossyPoolingImproper.register_forward_hook
      ~JanossyPoolingImproper.register_forward_pre_hook
      ~JanossyPoolingImproper.register_parameter
      ~JanossyPoolingImproper.requires_grad_
      ~JanossyPoolingImproper.share_memory
      ~JanossyPoolingImproper.state_dict
      ~JanossyPoolingImproper.to
      ~JanossyPoolingImproper.train
      ~JanossyPoolingImproper.type
      ~JanossyPoolingImproper.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~JanossyPoolingImproper.T_destination
      ~JanossyPoolingImproper.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.janossy.JanossyPoolingNonbonded.rst
================================================
espaloma.nn.readout.janossy.JanossyPoolingNonbonded
===================================================

.. currentmodule:: espaloma.nn.readout.janossy

.. autoclass:: JanossyPoolingNonbonded
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~JanossyPoolingNonbonded.__init__
      ~JanossyPoolingNonbonded.add_module
      ~JanossyPoolingNonbonded.apply
      ~JanossyPoolingNonbonded.bfloat16
      ~JanossyPoolingNonbonded.buffers
      ~JanossyPoolingNonbonded.children
      ~JanossyPoolingNonbonded.cpu
      ~JanossyPoolingNonbonded.cuda
      ~JanossyPoolingNonbonded.double
      ~JanossyPoolingNonbonded.eval
      ~JanossyPoolingNonbonded.extra_repr
      ~JanossyPoolingNonbonded.float
      ~JanossyPoolingNonbonded.forward
      ~JanossyPoolingNonbonded.half
      ~JanossyPoolingNonbonded.load_state_dict
      ~JanossyPoolingNonbonded.modules
      ~JanossyPoolingNonbonded.named_buffers
      ~JanossyPoolingNonbonded.named_children
      ~JanossyPoolingNonbonded.named_modules
      ~JanossyPoolingNonbonded.named_parameters
      ~JanossyPoolingNonbonded.parameters
      ~JanossyPoolingNonbonded.register_backward_hook
      ~JanossyPoolingNonbonded.register_buffer
      ~JanossyPoolingNonbonded.register_forward_hook
      ~JanossyPoolingNonbonded.register_forward_pre_hook
      ~JanossyPoolingNonbonded.register_parameter
      ~JanossyPoolingNonbonded.requires_grad_
      ~JanossyPoolingNonbonded.share_memory
      ~JanossyPoolingNonbonded.state_dict
      ~JanossyPoolingNonbonded.to
      ~JanossyPoolingNonbonded.train
      ~JanossyPoolingNonbonded.type
      ~JanossyPoolingNonbonded.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~JanossyPoolingNonbonded.T_destination
      ~JanossyPoolingNonbonded.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.janossy.LinearMixtureToOriginal.rst
================================================
espaloma.nn.readout.janossy.LinearMixtureToOriginal
===================================================

.. currentmodule:: espaloma.nn.readout.janossy

.. autoclass:: LinearMixtureToOriginal
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~LinearMixtureToOriginal.__init__
      ~LinearMixtureToOriginal.add_module
      ~LinearMixtureToOriginal.apply
      ~LinearMixtureToOriginal.bfloat16
      ~LinearMixtureToOriginal.buffers
      ~LinearMixtureToOriginal.children
      ~LinearMixtureToOriginal.cpu
      ~LinearMixtureToOriginal.cuda
      ~LinearMixtureToOriginal.double
      ~LinearMixtureToOriginal.eval
      ~LinearMixtureToOriginal.extra_repr
      ~LinearMixtureToOriginal.float
      ~LinearMixtureToOriginal.forward
      ~LinearMixtureToOriginal.half
      ~LinearMixtureToOriginal.load_state_dict
      ~LinearMixtureToOriginal.modules
      ~LinearMixtureToOriginal.named_buffers
      ~LinearMixtureToOriginal.named_children
      ~LinearMixtureToOriginal.named_modules
      ~LinearMixtureToOriginal.named_parameters
      ~LinearMixtureToOriginal.parameters
      ~LinearMixtureToOriginal.register_backward_hook
      ~LinearMixtureToOriginal.register_buffer
      ~LinearMixtureToOriginal.register_forward_hook
      ~LinearMixtureToOriginal.register_forward_pre_hook
      ~LinearMixtureToOriginal.register_parameter
      ~LinearMixtureToOriginal.requires_grad_
      ~LinearMixtureToOriginal.share_memory
      ~LinearMixtureToOriginal.state_dict
      ~LinearMixtureToOriginal.to
      ~LinearMixtureToOriginal.train
      ~LinearMixtureToOriginal.type
      ~LinearMixtureToOriginal.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~LinearMixtureToOriginal.T_destination
      ~LinearMixtureToOriginal.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.janossy.rst
================================================
espaloma.nn.readout.janossy
===========================

.. automodule:: espaloma.nn.readout.janossy
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      ExpCoefficients
      JanossyPooling
      JanossyPoolingImproper
      JanossyPoolingNonbonded
      LinearMixtureToOriginal
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.node_typing.NodeTyping.rst
================================================
espaloma.nn.readout.node\_typing.NodeTyping
===========================================

.. currentmodule:: espaloma.nn.readout.node_typing

.. autoclass:: NodeTyping
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~NodeTyping.__init__
      ~NodeTyping.add_module
      ~NodeTyping.apply
      ~NodeTyping.bfloat16
      ~NodeTyping.buffers
      ~NodeTyping.children
      ~NodeTyping.cpu
      ~NodeTyping.cuda
      ~NodeTyping.double
      ~NodeTyping.eval
      ~NodeTyping.extra_repr
      ~NodeTyping.float
      ~NodeTyping.forward
      ~NodeTyping.half
      ~NodeTyping.load_state_dict
      ~NodeTyping.modules
      ~NodeTyping.named_buffers
      ~NodeTyping.named_children
      ~NodeTyping.named_modules
      ~NodeTyping.named_parameters
      ~NodeTyping.parameters
      ~NodeTyping.register_backward_hook
      ~NodeTyping.register_buffer
      ~NodeTyping.register_forward_hook
      ~NodeTyping.register_forward_pre_hook
      ~NodeTyping.register_parameter
      ~NodeTyping.requires_grad_
      ~NodeTyping.share_memory
      ~NodeTyping.state_dict
      ~NodeTyping.to
      ~NodeTyping.train
      ~NodeTyping.type
      ~NodeTyping.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~NodeTyping.T_destination
      ~NodeTyping.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.node_typing.rst
================================================
espaloma.nn.readout.node\_typing
================================

.. automodule:: espaloma.nn.readout.node_typing
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      NodeTyping
   
   
================================================
FILE: docs/autosummary/espaloma.nn.readout.rst
================================================
espaloma.nn.readout
===================

.. automodule:: espaloma.nn.readout
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.nn.readout.base_readout
   espaloma.nn.readout.charge_equilibrium
   espaloma.nn.readout.graph_level_readout
   espaloma.nn.readout.janossy
   espaloma.nn.readout.node_typing


================================================
FILE: docs/autosummary/espaloma.nn.rst
================================================
﻿espaloma.nn
===========

.. automodule:: espaloma.nn
  
   
.. rubric:: Modules

.. autosummary::
   :toctree:
   :template: custom-module-template.rst
   :recursive:


   espaloma.nn.baselines
   espaloma.nn.layers
   espaloma.nn.readout
   espaloma.nn.sequential


================================================
FILE: docs/autosummary/espaloma.nn.sequential.Sequential.rst
================================================
espaloma.nn.sequential.Sequential
=================================

.. currentmodule:: espaloma.nn.sequential

.. autoclass:: Sequential
   :members:
   :show-inheritance:
   :inherited-members:

   
   .. automethod:: __init__

   
   .. rubric:: Methods

   .. autosummary::
   
      ~Sequential.__init__
      ~Sequential.add_module
      ~Sequential.apply
      ~Sequential.bfloat16
      ~Sequential.buffers
      ~Sequential.children
      ~Sequential.cpu
      ~Sequential.cuda
      ~Sequential.double
      ~Sequential.eval
      ~Sequential.extra_repr
      ~Sequential.float
      ~Sequential.forward
      ~Sequential.half
      ~Sequential.load_state_dict
      ~Sequential.modules
      ~Sequential.named_buffers
      ~Sequential.named_children
      ~Sequential.named_modules
      ~Sequential.named_parameters
      ~Sequential.parameters
      ~Sequential.register_backward_hook
      ~Sequential.register_buffer
      ~Sequential.register_forward_hook
      ~Sequential.register_forward_pre_hook
      ~Sequential.register_parameter
      ~Sequential.requires_grad_
      ~Sequential.share_memory
      ~Sequential.state_dict
      ~Sequential.to
      ~Sequential.train
      ~Sequential.type
      ~Sequential.zero_grad
   
   
   .. rubric:: Attributes

   .. autosummary::
   
      ~Sequential.T_destination
      ~Sequential.dump_patches
   
   
================================================
FILE: docs/autosummary/espaloma.nn.sequential.rst
================================================
espaloma.nn.sequential
======================

.. automodule:: espaloma.nn.sequential
  
   
   .. rubric:: Classes

   .. autosummary::
      :toctree:
      :template: custom-class-template.rst
   
      Sequential
   
   
================================================
FILE: docs/conf.py
================================================
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/stable/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.

# Incase the project was not installed
import os
import sys
import subprocess

sys.path.insert(0, os.path.abspath('..'))

# -- Project information -----------------------------------------------------

project = 'espaloma'
copyright = ("2020, Yuanqing Wang @ choderalab // MSKCC.")
author = 'Yuanqing Wang'
github_url = "https://github.com/choderalab/espaloma"

# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = ''

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autosummary',
    'sphinx.ext.autodoc',
    'sphinx.ext.mathjax',
    'sphinx.ext.viewcode',
    'sphinx.ext.napoleon',
    'sphinx.ext.intersphinx',
    'sphinx.ext.extlinks',
    'sphinx.ext.coverage',
    # 'numpydoc',
]

autosummary_generate = True
napoleon_google_docstring = False
napoleon_use_param = False
napoleon_use_ivar = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = "en"

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'default'


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}


# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'espalomadoc'


# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'espaloma.tex', 'espaloma Documentation',
     'espaloma', 'manual'),
]


# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
    (master_doc, 'espaloma', 'espaloma Documentation',
     [author], 1)
]


# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'espaloma', 'espaloma Documentation',
     author, 'espaloma', 'Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm',
     'Miscellaneous'),
]


# -- Extension configuration -------------------------------------------------


================================================
FILE: docs/deploy.rst
================================================
Deploy espaloma 0.3.2 force field to parametrize your MM system
===============================================================
Pretrained espaloma force field could be deployed on arbitrary small molecule
systems in a few lines::

    # imports
    import os
    import torch
    import espaloma as esp
    
    # define or load a molecule of interest via the Open Force Field toolkit
    from openff.toolkit.topology import Molecule
    molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
    
    # create an Espaloma Graph object to represent the molecule of interest
    molecule_graph = esp.Graph(molecule)
    
    # load pretrained model
    espaloma_model = esp.get_model("latest")
    
    # apply a trained espaloma model to assign parameters
    espaloma_model(molecule_graph.heterograph)
    
    # create an OpenMM System for the specified molecule
    openmm_system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph)

If using espaloma from a local ``.pt`` file, say for example ``espaloma-0.3.2.pt``,
then you would need to run the ``eval`` method of the model to get the correct
inference/predictions, as follows::

    # load local pretrained model
    espaloma_model = torch.load("espaloma-0.3.2.pt")
    espaloma_model.eval()

The rest of the code should be the same as in the previous example.


================================================
FILE: docs/download_experiments.sh
================================================
export fileid=1qdHEypk3uMhZEYCStWTU8u1uIDHzH3Qy
wget -O typing.ipynb 'https://docs.google.com/uc?export=download&id='$fileid
ipython nbconvert typing.ipynb --to rst --TagRemovePreprocessor.remove_all_outputs_tags='{"remove_output"}'
mv typing.rst experiments/typing.rst

export fileid=1krhwGHKoqL5-_P0G89fDB7Iw3ENHW2G_
wget -O mm_fitting_small.ipynb 'https://docs.google.com/uc?export=download&id='$fileid
ipython nbconvert mm_fitting_small.ipynb --to rst --TagRemovePreprocessor.remove_all_outputs_tags='{"remove_output"}'
mv mm_fitting_small.rst experiments/mm_fitting_small.rst
mv mm_fitting_small_files experiments/mm_fitting_small_files

export fileid=1i_z0b0-m_91bMww1hY5Kdc76VHmtHsWD
wget -O qm_fitting.ipynb 'https://docs.google.com/uc?export=download&id='$fileid
ipython nbconvert qm_fitting.ipynb --to rst --TagRemovePreprocessor.remove_all_outputs_tags='{"remove_output"}'
cp qm_fitting.rst experiments/qm_fitting.rst

rm *.ipynb


================================================
FILE: docs/experiments/index.rst
================================================
To reproduce experiments in paper https://arxiv.org/abs/2010.01196


.. toctree::
   :maxdepth: 2
   :caption: Contents:

   typing
   mm_fitting_small
   qm_fitting


================================================
FILE: docs/experiments/mm_fitting_small.rst
================================================
Toy experiment: Molecular mechanics (MM) fitting on subsampled PhAlkEthOH dataset.
==================================================================================

**Open in Google Colab**:
http://data.wangyq.net/esp_notebooks/phalkethoh_mm_small.ipynb

This notebook is intended to recover the MM fitting behavior in
https://arxiv.org/abs/2010.01196

To assess how well Espaloma can learn to reproduce an MM force field
from a limited amount of data, we selected a chemical dataset of limited
complexity—PhAlkEthOH—which consists of linear and cyclic molecules
containing phenyl rings, small alkanes, ethers, and alcohols composed of
only the elements carbon, oxygen, and hydrogen. We generated a set of
conformational snapshots for each molecule using short high-temperature
molecular dynamics simulations at 300~K initiated from multiple
conformations to ensure adequate sampling of conformers. The AlkEthOH
dataset was randomly partitioned (by molecules) into 80% training, 10%
validation, and 10% test molecules, with 100 snapshots/molecule, and an
Espaloma model was trained with early stopping via monitoring for a
decrease in accuracy in the validation set.

.. image:: https://pbs.twimg.com/media/FBL0qACXIBkJLQZ?format=png&name=4096x4096

Installation and imports
------------------------

.. code:: python

    # install conda
    ! pip install -q condacolab
    import condacolab
    condacolab.install()


.. parsed-literal::

    ⏬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh...
    📦 Installing...
    📌 Adjusting configuration...
    🩹 Patching environment...
    ⏲ Done in 0:00:34
    🔁 Restarting kernel...


.. code:: python

    %%capture
    ! mamba install --yes --strict-channel-priority --channel jaimergp/label/unsupported-cudatoolkit-shim --channel omnia --channel omnia/label/cuda100 --channel dglteam --channel numpy openmm openmmtools openmmforcefields rdkit openff-toolkit dgl-cuda10.0 qcportal

.. code:: python

    ! git clone https://github.com/choderalab/espaloma.git


.. parsed-literal::

    Cloning into 'espaloma'...
    remote: Enumerating objects: 7812, done.[K
    remote: Counting objects: 100% (3634/3634), done.[K
    remote: Compressing objects: 100% (1649/1649), done.[K
    remote: Total 7812 (delta 2714), reused 2639 (delta 1900), pack-reused 4178[K
    Receiving objects: 100% (7812/7812), 13.50 MiB | 11.77 MiB/s, done.
    Resolving deltas: 100% (5538/5538), done.


.. code:: python

    import torch
    import sys
    sys.path.append("/content/espaloma")
    import espaloma as esp


.. parsed-literal::

    Warning: Unable to load toolkit 'OpenEye Toolkit'. The Open Force Field Toolkit does not require the OpenEye Toolkits, and can use RDKit/AmberTools instead. However, if you have a valid license for the OpenEye Toolkits, consider installing them for faster performance and additional file format support: https://docs.eyesopen.com/toolkits/python/quickstart-python/linuxosx.html OpenEye offers free Toolkit licenses for academics: https://www.eyesopen.com/academic-licensing


Load dataset
------------

Here we load the PhAlKeThoh dataset and shuffle before splitting into
training, validation, and test (80%:10%:10%)

.. code:: python

    %%capture
    ! wget http://data.wangyq.net/esp_dataset/phalkethoh_mm_small.zip
    ! unzip phalkethoh_mm_small.zip

.. code:: python

    ds = esp.data.dataset.GraphDataset.load("phalkethoh")
    ds.shuffle(seed=2666)
    ds_tr, ds_vl, ds_te = ds.split([8, 1, 1])


.. parsed-literal::

    DGL backend not selected or invalid.  Assuming PyTorch for now.
    Using backend: pytorch


.. parsed-literal::

    Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


A training dataloader is constructed with ``batch_size=100``

.. code:: python

    ds_tr_loader = ds_tr.view(batch_size=100, shuffle=True)

.. code:: python

    g_tr = next(iter(ds_tr.view(batch_size=len(ds_tr))))
    g_vl = next(iter(ds_vl.view(batch_size=len(ds_vl))))


.. parsed-literal::

    /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: From v0.5, DGLHeteroGraph is merged into DGLGraph. You can safely replace dgl.batch_hetero with dgl.batch
      return warnings.warn(message, category=category, stacklevel=1)


Define model
------------

Define Espaloma stage I: graph -> atom latent representation

.. code:: python

    representation = esp.nn.Sequential(
        layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL
        config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation
    )

Define Espaloma stage II and III: atom latent representation -> bond,
angle, and torsion representation and parameters. And compose all three
Espaloma stages into an end-to-end model.

.. code:: python

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=128, config=[128, "relu", 128, "relu", 128, "relu"],
        out_features={              # define modular MM parameters Espaloma will assign
            1: {"e": 1, "s": 1}, # atom hardness and electronegativity
            2: {"log_coefficients": 2}, # bond linear combination, enforce positive
            3: {"log_coefficients": 2}, # angle linear combination, enforce positive
            4: {"k": 6}, # torsion barrier heights (can be positive or negative)
        },
    )
    
    espaloma_model = torch.nn.Sequential(
                     representation, readout, esp.nn.readout.janossy.ExpCoefficients(),
                     esp.mm.geometry.GeometryInGraph(), 
                     esp.mm.energy.EnergyInGraph(),
                     esp.mm.energy.EnergyInGraph(suffix="_ref"),
                     esp.nn.readout.charge_equilibrium.ChargeEquilibrium(),
    )


.. code:: python

    if torch.cuda.is_available():
        espaloma_model = espaloma_model.cuda()

Loss function is specified as the MSE between predicted and reference
energy.

.. code:: python

    loss_fn = esp.metrics.GraphMetric(
            base_metric=torch.nn.MSELoss(), # use mean-squared error loss
            between=['u', "u_ref"],         # between predicted and QM energies
            level="g", # compare on graph level
    )

Define optimizer
----------------

.. code:: python

    optimizer = torch.optim.Adam(espaloma_model.parameters(), 1e-4)

Train it!
---------

.. code:: python

    for idx_epoch in range(10000):
        for g in ds_tr_loader:
            optimizer.zero_grad()
            if torch.cuda.is_available():
                g = g.to("cuda:0")
            g = espaloma_model(g)
            loss = loss_fn(g)
            loss.backward()
            optimizer.step()
            torch.save(espaloma_model.state_dict(), "%s.th" % idx_epoch)


.. parsed-literal::

    /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: From v0.5, DGLHeteroGraph is merged into DGLGraph. You can safely replace dgl.batch_hetero with dgl.batch
      return warnings.warn(message, category=category, stacklevel=1)
    /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: dgl.to_homo is deprecated. Please use dgl.to_homogeneous
      return warnings.warn(message, category=category, stacklevel=1)


Inspect
-------

.. code:: python

    inspect_metric = esp.metrics.GraphMetric(
            base_metric=torch.nn.L1Loss(), # use mean-squared error loss
            between=['u', "u_ref"],         # between predicted and QM energies
            level="g", # compare on graph level
    )

.. code:: python

    if torch.cuda.is_available():
        g_vl = g_vl.to("cuda:0")
        g_tr = g_tr.to("cuda:0")

.. code:: python

    loss_tr = []
    loss_vl = []

.. code:: python

    for idx_epoch in range(10000):
        espaloma_model.load_state_dict(
            torch.load("%s.th" % idx_epoch)
        )
    
        espaloma_model(g_tr)
        loss_tr.append(inspect_metric(g_tr).item())
    
        espaloma_model(g_vl)
        loss_vl.append(inspect_metric(g_vl).item())


.. parsed-literal::

    /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: dgl.to_homo is deprecated. Please use dgl.to_homogeneous
      return warnings.warn(message, category=category, stacklevel=1)


.. code:: python

    import numpy as np
    loss_tr = np.array(loss_tr) * 627.5
    loss_vl = np.array(loss_vl) * 627.5

.. code:: python

    from matplotlib import pyplot as plt 
    plt.plot(loss_tr, label="train")
    plt.plot(loss_vl, label="valid")
    plt.yscale("log")
    plt.legend()


.. parsed-literal::

    <matplotlib.legend.Legend at 0x7fd8f0eebd90>


.. image:: mm_fitting_small_files/mm_fitting_small_31_1.png


================================================
FILE: docs/experiments/qm_fitting.rst
================================================
Quantum mechanics (QM) fitting experiment.
==========================================

**Open in Google Colab:**
http://data.wangyq.net/esp_notesbooks/qm_fitting.ipynb

This notebook recovers the QM fitting experiment in
https://arxiv.org/abs/2010.01196

|image1| **Table 2:** Espaloma can directly fit quantum chemical
energies to produce a new molecular mechanics force fields with better
accuracy than traditional force fields based on atom typing or direct
chemical perception. Espaloma was fit to quantum chemical potential
energies for conformations generated by optimization trajectories from
multiple conformers in various datasets from QCArchive.All datasets were
partitioned by molecules 80:10:10 into train:validate:test sets. We
report the RMSE on training and test sets, as well as the performance of
legacy force fields on the test set. All statistics are computed with
predicted and reference energies centered to have zero mean for each
molecule to focus on errors in relative conformational energetics,
rather than on errors in predicting the heats of formation of chemical
species (which the MM functional form used here is incapable of). The
95% confidence intervals annotated are calculated by via bootstrapping
molecules with replacement using 1000 replicates. \*: Six cyclic
peptides that cannot be parametrized using OpenForceField toolkit
engine~:raw-latex:`\cite{openff-toolkit-0.10.0}` and is not included.

Since Espaloma can derive a force field solely by fitting to energies
(and optionally gradients), we repeat the end-to-end fitting experiment
(See notebook
http://data.wangyq.net/esp_notebooks/phalkethoh_mm_small.ipynb) directly
using a quantum chemical (QM) datasets used to build and evaluate MM
force fields. We assessed the ability of Espaloma to learn several
distinct quantum chemical datasets generated by the Open Force Field
Initiativeand deposited in the MolSSI QCArchive: - **PhAlkEthOH** is a
collection of compounds containing only the elements carbon, hydrogen,
and oxygen in compounds containing phenyl rings, alkanes, ketones, and
alcohols. Limited in elemental and chemical diversity, this dataset is
chosen as a proof-of-concept to demonstrate the capability of Espaloma
to fit and generalize quantum chemical energies when training data is
sufficient to exhaustively cover the breadth of chemical environments. -
**OpenFF Gen2 Optimization** consists of druglike molecules used in the
parametrization of the Open Force Field 1.2.0 (“Parsley”) small molecule
force field. This set was constructed by the Open Force Field Consortium
from challenging molecule structures provided by Pfizer, Bayer, and
Roche, along with diverse molecules selected from eMolecules to achieve
useful coverage of chemical space. - **VEHICLe**, or *virtual
exploratory heterocyclic library*, is a set of heteroaromatic ring
systems of interest to drug discovery. The atoms in the molecules in
this dataset have interesting chemical environments in heteroarmatic
rings that present a challenge to traditional atom typing schemes, which
cannot easily accomodate the nuanced distinctions in chemical
environments that lead to perturbations in heterocycle structure.We use
this dataset to illustrate that Espaloma performs in situations
challenging to traditional force fields. - **PepConf** contains a
variety of short peptides, including capped, cyclic, and
disulfide-bonded peptides.This dataset—regenerated using the Open Force
Field QCSubmit tool—explores the applicability of Espaloma to
biopolymers, such as proteins.

Since nonbonded terms are generally optimized to fit other
condensed-phase properties, we focused here on optimizing only the
valence parameters (bond, angle, and proper and improper torsion) to fit
these gas-phase quantum chemical datasets, fixing the non-bonded
energies using a legacy force field. Because we are learning an MM force
field that is incapable of reproducing quantum chemical heats of
formation reflected as an additive offset in the quantum chemical energy
targets, in both training and test sets, snapshot energies for each
molecule are shifted to have zero mean. All datasets are randomly
shuffled and split (by molecules) into training (80%), validation (10%),
and test (10%) sets.

.. |image1| image:: https://pbs.twimg.com/media/FBL1Gb0WEAYkUhM?format=png&name=4096x4096

Installation and imports
------------------------

.. code:: python

    # install conda
    ! pip install -q condacolab
    import condacolab
    condacolab.install()

.. code:: python

    %%capture
    ! mamba install --yes --strict-channel-priority --channel jaimergp/label/unsupported-cudatoolkit-shim --channel omnia --channel omnia/label/cuda100 --channel dglteam --channel numpy openmm openmmtools openmmforcefields rdkit openff-toolkit dgl-cuda10.0 qcportal

.. code:: python

    ! git clone https://github.com/choderalab/espaloma.git

.. code:: python

    import torch
    import sys
    sys.path.append("/content/espaloma")
    import espaloma as esp

Load dataset
------------

Choose a dataset from ``["gen2", "pepconf", "vehicle", "phalkethoh"]``.

.. code:: python

    dataset_name = "gen2"
    # dataset_name = "pepconf"
    # dataset_name = "vehicle"
    # dataset_name = "phalkethoh"

.. code:: python

    %%capture
    ! wget "data.wangyq.net/esp_dataset/"$dataset_name".zip"
    ! unzip $dataset_name".zip"

.. code:: python

    ds = esp.data.dataset.GraphDataset.load(dataset_name)
    ds.shuffle(seed=2666)
    ds_tr, ds_vl, ds_te = ds.split([8, 1, 1])

Define model
------------

Define Espaloma stage I: graph -> atom latent representation

.. code:: python

    representation = esp.nn.Sequential(
        layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL
        config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation
    )

Define Espaloma stage II and III: atom latent representation -> bond,
angle, and torsion representation and parameters. And compose all three
Espaloma stages into an end-to-end model.

.. code:: python

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=128, config=[128, "relu", 128, "relu", 128, "relu"],
        out_features={              # define modular MM parameters Espaloma will assign
            1: {"e": 1, "s": 1}, # atom hardness and electronegativity
            2: {"log_coefficients": 2}, # bond linear combination, enforce positive
            3: {"log_coefficients": 2}, # angle linear combination, enforce positive
            4: {"k": 6}, # torsion barrier heights (can be positive or negative)
        },
    )
    
    espaloma_model = torch.nn.Sequential(
                     representation, readout, esp.nn.readout.janossy.ExpCoefficients(),
                     esp.mm.geometry.GeometryInGraph(), 
                     esp.mm.energy.EnergyInGraph(),
    )


.. code:: python

    if torch.cuda.is_available():
        espaloma_model = espaloma_model.cuda()

Loss function is specified as the MSE between predicted and reference
energy.

.. code:: python

    loss_fn = esp.metrics.GraphMetric(
            base_metric=torch.nn.MSELoss(), # use mean-squared error loss
            between=['u', "u_ref"],         # between predicted and QM energies
            level="g", # compare on graph level
    )

Define optimizer
----------------

.. code:: python

    optimizer = torch.optim.Adam(espaloma_model.parameters(), 1e-4)

Train it!
---------

.. code:: python

    for idx_epoch in range(10000):
        for g in ds_tr:
            optimizer.zero_grad()
            if torch.cuda.is_available():
                g.heterograph = g.heterograph.to("cuda:0")
            g = espaloma_model(g.heterograph)
            loss = loss_fn(g)
            loss.backward()
            optimizer.step()
        torch.save(espaloma_model.state_dict(), "%s.th" % idx_epoch)

Inspect
-------

.. code:: python

    inspect_metric = esp.metrics.center(torch.nn.L1Loss()) # use mean-squared error loss

.. code:: python

    loss_tr = []
    loss_vl = []

.. code:: python

    with torch.no_grad():
        for idx_epoch in range(10000):
            espaloma_model.load_state_dict(
                torch.load("%s.th" % idx_epoch)
            )
    
            # training set performance
            u = []
            u_ref = []
            for g in ds_tr:
                if torch.cuda.is_available():
                    g.heterograph = g.heterograph.to("cuda:0")
                espaloma_model(g.heterograph)
                u.append(g.nodes['g'].data['u'])
                u_ref.append(g.nodes['g'])
            u = torch.cat(u, dim=0)
            u_ref = torch.cat(u_ref, dim=0)
            loss_tr.append(inspect_metric(u, u_ref))
    
    
            # validation set performance
            u = []
            u_ref = []
            for g in ds_vl:
                if torch.cuda.is_available():
                    g.heterograph = g.heterograph.to("cuda:0")
                espaloma_model(g.heterograph)
                u.append(g.nodes['g'].data['u'])
                u_ref.append(g.nodes['g'])
            u = torch.cat(u, dim=0)
            u_ref = torch.cat(u_ref, dim=0)
            loss_vl.append(inspect_metric(u, u_ref))


.. code:: python

    import numpy as np
    loss_tr = np.array(loss_tr) * 627.5
    loss_vl = np.array(loss_vl) * 627.5

.. code:: python

    from matplotlib import pyplot as plt 
    plt.plot(loss_tr, label="train")
    plt.plot(loss_vl, label="valid")
    plt.yscale("log")
    plt.legend()


================================================
FILE: docs/experiments/typing.rst
================================================
Atom typing recovery experiment.
================================

**Open in Google Colab**:
http://data.wangyq.net/esp_notebooks/typing.ipynb

(GPU preferred)

In this notebook, we reproduce the atom typing recovery experiment in
`Wang Y, Fass J, and Chodera JD “End-to-End Differentiable Construction
of Molecular Mechanics Force
Fields <https://arxiv.org/abs/2010.01196>`__

(Section 3: Graph neural networks can learn to reproduce human-defined
legacy atom types with high accuracy; Figure 3. Graph neural networks
can reproduce legacy atom types with high accuracy.)

.. image:: https://pbs.twimg.com/media/FBLz_6sWUAM2iHa?format=jpg&name=4096x4096

Graph neural networks can reproduce legacy atom types with high
accuracy.

The Stage 1 graph neural network of Espaloma chained to a discrete atom
type readout was fit to GAFF 1.81 atom types on a subset of ZINC
distributed with parm Frosst as a validation set .

The 7529 molecules in this set were partitioned 80:10:10 into
training:test:validation sets for this experiment. The overall test set
accuracy was :math:`99.07\%_{98.93\%}^{99.22\%}`, with 1000 bootstrap
replicates used to estimate the confidence intervals arising from finite
test set size effects. (a) The distribution of the number of atom type
discrepancies on the test set demonstrates that only a minority of atoms
are incorrectly typed. (b) The error rate per element is primarily
concentrated within carbon, nitrogen, and sulfur types. (c) Examining
atom type failures in detail on molecules with the largest numbers of
discrepancies shows that the atom types are easily confused by a human,
since they represent qualities that are difficult to precisely define.
(d) The distribution of predicted atom types for each reference atom
type for carbon types are shown; on-diagonal values indicate agreement.
The percentages annotated under x-axis denote the relative abundance
within the test set.

Installation and Imports
------------------------

First, we install espaloma after all of its dependencies. Note that this
is going to be significantly simplified.

.. code:: python

    %%capture
    ! wget -c https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
    ! bash Miniconda3-latest-Linux-x86_64.sh -b -f -p /usr/local
    ! conda config --add channels conda-forge --add channels omnia --add channels omnia/label/cuda100 --add channels dglteam
    ! conda update --yes --all
    ! conda create --yes -n openmm python=3.6 numpy openmm openmmtools rdkit openforcefield==0.7.0 dgl-cuda10.0 qcportal
    ! git clone https://github.com/choderalab/espaloma.git

.. code:: python

    import torch
    import dgl
    import numpy as np

Get dataset
-----------

.. code:: python

    import os
    if not os.path.exists("zinc"):
        os.system("wget data.wangyq.net/esp_datasets/zinc")
    ds = esp.data.dataset.GraphDataset.load("zinc")

Assign legacy typing
--------------------

Next, we assign legacy typings using `GAFF-1.81 force
field. <https://github.com/openmm/openmmforcefields/blob/master/amber/gaff/dat/gaff-1.81.dat#L20-L32>`__

.. code:: python

    typing = esp.graphs.legacy_force_field.LegacyForceField('gaff-1.81')
    ds.apply(typing, in_place=True) # this modify the original data

Data massaging
--------------

We then split the data into training, test, and validatoin (80:10:10)
and batch the the datasets.

.. code:: python

    ds_tr, ds_te, ds_vl = ds.split([8, 1, 1])

.. code:: python

    ds_tr = ds_tr.view('graph', batch_size=100, shuffle=True)
    ds_te = ds_te.view('graph', batch_size=100)
    ds_vl = ds_vl.view('graph', batch_size=100)

Defining model
--------------

We define a graph neural network (GNN) model with
`SAGEConv <https://arxiv.org/abs/1706.02216>`__ with 128 units, three
layers, and ReLU activation functions.

.. code:: python

    # define a layer
    layer = esp.nn.layers.dgl_legacy.gn("SAGEConv")
    
    # define a representation
    representation = esp.nn.Sequential(
            layer,
            [128, "relu", 128, "relu", 128, "relu"],
    )
    
    # define a readout
    readout = esp.nn.readout.node_typing.NodeTyping(
            in_features=128,
            n_classes=100
    )
    
    net = torch.nn.Sequential(
        representation,
        readout
    )

Define graph-level loss function
--------------------------------

.. code:: python

    loss_fn = esp.metrics.TypingAccuracy()

Train the model
---------------

.. code:: python

    # define optimizer
    optimizer = torch.optim.Adam(net.parameters(), 1e-5)
    
    # train the model
    for _ in range(3000):
        for g in ds_tr:
            optimizer.zero_grad()
            net(g.heterograph)
            loss = loss_fn(g.heterograph)
            loss.backward()
            optimizer.step()


================================================
FILE: docs/index.rst
================================================
.. espaloma documentation master file, created by
   sphinx-quickstart on Thu Mar 15 13:55:56 2018.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

espaloma: Extensible Surrogate Potential Optimized by Message-passing Algorithms
=========================================================

Source code for Wang Y, Fass J, and Chodera JD "End-to-End Differentiable Construction of Molecular Mechanics Force Fields. https://arxiv.org/abs/2010.01196

.. image:: _static/espaloma_abstract_v2-2.png

Paper Abstract
--------------
Molecular mechanics (MM) potentials have long been a workhorse of computational chemistry.
Leveraging accuracy and speed, these functional forms find use in a wide variety of applications in biomolecular modeling and drug discovery, from rapid virtual screening to detailed free energy calculations.
Traditionally, MM potentials have relied on human-curated, inflexible, and poorly extensible discrete chemical perception rules *atom types* for applying parameters to small molecules or biopolymers, making it difficult to optimize both types and parameters to fit quantum chemical or physical property data.
Here, we propose an alternative approach that uses *graph neural networks* to perceive chemical environments, producing continuous atom embeddings from which valence and nonbonded parameters can be predicted using invariance-preserving layers.
Since all stages are built from smooth neural functions, the entire process---spanning chemical perception to parameter assignment---is modular and end-to-end differentiable with respect to model parameters, allowing new force fields to be easily constructed, extended, and applied to arbitrary molecules.
We show that this approach is not only sufficiently expressive to reproduce legacy atom types, but that it can learn and extend existing molecular mechanics force fields, construct entirely new force fields applicable to both biopolymers and small molecules from quantum chemical calculations, and even learn to accurately predict free energies from experimental observables.


Lab Meeting
-----------
.. raw:: html

    <iframe width="600" height="450" src="https://www.youtube.com/embed/OC210nUuXHk"></iframe>

Full video: https://youtu.be/OC210nUuXHk


.. toctree::
   :maxdepth: 2
   :caption: Contents:

   install
   deploy
   experiments/index
   api


Indices and tables
------------------

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


================================================
FILE: docs/install.rst
================================================
Installation
============

mamba
-----

We recommend using `mamba <https://mamba.readthedocs.io/en/latest/mamba-installation.html#mamba-installation>`_ which is a drop-in replacement for ``conda`` and is much faster.

.. code-block:: bash

   $ mamba create --name espaloma -c conda-forge "espaloma=0.3.2"

================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
set SPHINXPROJ=malt

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: docs/qm_fitting.rst
================================================
Quantum mechanics (QM) fitting experiment.
==========================================

**Open in Google Colab:**
http://data.wangyq.net/esp_notesbooks/qm_fitting.ipynb

This notebook recovers the QM fitting experiment in
https://arxiv.org/abs/2010.01196

|image1| **Table 2:** Espaloma can directly fit quantum chemical
energies to produce a new molecular mechanics force fields with better
accuracy than traditional force fields based on atom typing or direct
chemical perception. Espaloma was fit to quantum chemical potential
energies for conformations generated by optimization trajectories from
multiple conformers in various datasets from QCArchive.All datasets were
partitioned by molecules 80:10:10 into train:validate:test sets. We
report the RMSE on training and test sets, as well as the performance of
legacy force fields on the test set. All statistics are computed with
predicted and reference energies centered to have zero mean for each
molecule to focus on errors in relative conformational energetics,
rather than on errors in predicting the heats of formation of chemical
species (which the MM functional form used here is incapable of). The
95% confidence intervals annotated are calculated by via bootstrapping
molecules with replacement using 1000 replicates. \*: Six cyclic
peptides that cannot be parametrized using OpenForceField toolkit
engine~:raw-latex:`\cite{openff-toolkit-0.10.0}` and is not included.

Since Espaloma can derive a force field solely by fitting to energies
(and optionally gradients), we repeat the end-to-end fitting experiment
(See notebook
http://data.wangyq.net/esp_notebooks/phalkethoh_mm_small.ipynb) directly
using a quantum chemical (QM) datasets used to build and evaluate MM
force fields. We assessed the ability of Espaloma to learn several
distinct quantum chemical datasets generated by the Open Force Field
Initiativeand deposited in the MolSSI QCArchive: - **PhAlkEthOH** is a
collection of compounds containing only the elements carbon, hydrogen,
and oxygen in compounds containing phenyl rings, alkanes, ketones, and
alcohols. Limited in elemental and chemical diversity, this dataset is
chosen as a proof-of-concept to demonstrate the capability of Espaloma
to fit and generalize quantum chemical energies when training data is
sufficient to exhaustively cover the breadth of chemical environments. -
**OpenFF Gen2 Optimization** consists of druglike molecules used in the
parametrization of the Open Force Field 1.2.0 (“Parsley”) small molecule
force field. This set was constructed by the Open Force Field Consortium
from challenging molecule structures provided by Pfizer, Bayer, and
Roche, along with diverse molecules selected from eMolecules to achieve
useful coverage of chemical space. - **VEHICLe**, or *virtual
exploratory heterocyclic library*, is a set of heteroaromatic ring
systems of interest to drug discovery. The atoms in the molecules in
this dataset have interesting chemical environments in heteroarmatic
rings that present a challenge to traditional atom typing schemes, which
cannot easily accomodate the nuanced distinctions in chemical
environments that lead to perturbations in heterocycle structure.We use
this dataset to illustrate that Espaloma performs in situations
challenging to traditional force fields. - **PepConf** contains a
variety of short peptides, including capped, cyclic, and
disulfide-bonded peptides.This dataset—regenerated using the Open Force
Field QCSubmit tool—explores the applicability of Espaloma to
biopolymers, such as proteins.

Since nonbonded terms are generally optimized to fit other
condensed-phase properties, we focused here on optimizing only the
valence parameters (bond, angle, and proper and improper torsion) to fit
these gas-phase quantum chemical datasets, fixing the non-bonded
energies using a legacy force field. Because we are learning an MM force
field that is incapable of reproducing quantum chemical heats of
formation reflected as an additive offset in the quantum chemical energy
targets, in both training and test sets, snapshot energies for each
molecule are shifted to have zero mean. All datasets are randomly
shuffled and split (by molecules) into training (80%), validation (10%),
and test (10%) sets.

.. |image1| image:: https://pbs.twimg.com/media/FBL1Gb0WEAYkUhM?format=png&name=4096x4096

Installation and imports
------------------------

.. code:: python

    # install conda
    ! pip install -q condacolab
    import condacolab
    condacolab.install()

.. code:: python

    %%capture
    ! mamba install --yes --strict-channel-priority --channel jaimergp/label/unsupported-cudatoolkit-shim --channel omnia --channel omnia/label/cuda100 --channel dglteam --channel numpy openmm openmmtools openmmforcefields rdkit openff-toolkit dgl-cuda10.0 qcportal

.. code:: python

    ! git clone https://github.com/choderalab/espaloma.git

.. code:: python

    import torch
    import sys
    sys.path.append("/content/espaloma")
    import espaloma as esp

Load dataset
------------

Choose a dataset from ``["gen2", "pepconf", "vehicle", "phalkethoh"]``.

.. code:: python

    dataset_name = "gen2"
    # dataset_name = "pepconf"
    # dataset_name = "vehicle"
    # dataset_name = "phalkethoh"

.. code:: python

    %%capture
    ! wget "data.wangyq.net/esp_dataset/"$dataset_name".zip"
    ! unzip $dataset_name".zip"

.. code:: python

    ds = esp.data.dataset.GraphDataset.load(dataset_name)
    ds.shuffle(seed=2666)
    ds_tr, ds_vl, ds_te = ds.split([8, 1, 1])

Define model
------------

Define Espaloma stage I: graph -> atom latent representation

.. code:: python

    representation = esp.nn.Sequential(
        layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL
        config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation
    )

Define Espaloma stage II and III: atom latent representation -> bond,
angle, and torsion representation and parameters. And compose all three
Espaloma stages into an end-to-end model.

.. code:: python

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=128, config=[128, "relu", 128, "relu", 128, "relu"],
        out_features={              # define modular MM parameters Espaloma will assign
            1: {"e": 1, "s": 1}, # atom hardness and electronegativity
            2: {"log_coefficients": 2}, # bond linear combination, enforce positive
            3: {"log_coefficients": 2}, # angle linear combination, enforce positive
            4: {"k": 6}, # torsion barrier heights (can be positive or negative)
        },
    )
    
    espaloma_model = torch.nn.Sequential(
                     representation, readout, esp.nn.readout.janossy.ExpCoefficients(),
                     esp.mm.geometry.GeometryInGraph(), 
                     esp.mm.energy.EnergyInGraph(),
    )


.. code:: python

    if torch.cuda.is_available():
        espaloma_model = espaloma_model.cuda()

Loss function is specified as the MSE between predicted and reference
energy.

.. code:: python

    loss_fn = esp.metrics.GraphMetric(
            base_metric=torch.nn.MSELoss(), # use mean-squared error loss
            between=['u', "u_ref"],         # between predicted and QM energies
            level="g", # compare on graph level
    )

Define optimizer
----------------

.. code:: python

    optimizer = torch.optim.Adam(espaloma_model.parameters(), 1e-4)

Train it!
---------

.. code:: python

    for idx_epoch in range(10000):
        for g in ds_tr:
            optimizer.zero_grad()
            if torch.cuda.is_available():
                g.heterograph = g.heterograph.to("cuda:0")
            g = espaloma_model(g.heterograph)
            loss = loss_fn(g)
            loss.backward()
            optimizer.step()
        torch.save(espaloma_model.state_dict(), "%s.th" % idx_epoch)

Inspect
-------

.. code:: python

    inspect_metric = esp.metrics.center(torch.nn.L1Loss()) # use mean-squared error loss

.. code:: python

    loss_tr = []
    loss_vl = []

.. code:: python

    with torch.no_grad():
        for idx_epoch in range(10000):
            espaloma_model.load_state_dict(
                torch.load("%s.th" % idx_epoch)
            )
    
            # training set performance
            u = []
            u_ref = []
            for g in ds_tr:
                if torch.cuda.is_available():
                    g.heterograph = g.heterograph.to("cuda:0")
                espaloma_model(g.heterograph)
                u.append(g.nodes['g'].data['u'])
                u_ref.append(g.nodes['g'])
            u = torch.cat(u, dim=0)
            u_ref = torch.cat(u_ref, dim=0)
            loss_tr.append(inspect_metric(u, u_ref))
    
    
            # validation set performance
            u = []
            u_ref = []
            for g in ds_vl:
                if torch.cuda.is_available():
                    g.heterograph = g.heterograph.to("cuda:0")
                espaloma_model(g.heterograph)
                u.append(g.nodes['g'].data['u'])
                u_ref.append(g.nodes['g'])
            u = torch.cat(u, dim=0)
            u_ref = torch.cat(u_ref, dim=0)
            loss_vl.append(inspect_metric(u, u_ref))


.. code:: python

    import numpy as np
    loss_tr = np.array(loss_tr) * 627.5
    loss_vl = np.array(loss_vl) * 627.5

.. code:: python

    from matplotlib import pyplot as plt 
    plt.plot(loss_tr, label="train")
    plt.plot(loss_vl, label="valid")
    plt.yscale("log")
    plt.legend()


================================================
FILE: espaloma/.py
================================================


================================================
FILE: espaloma/__init__.py
================================================
"""
espaloma
Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm
"""

from . import metrics, units, data, app, graphs, mm, nn
from .app.experiment import *
from .graphs.graph import Graph
from .metrics import GraphMetric
from .mm.geometry import *
from .utils.model_fetch import get_model, get_model_path

# Add imports here
# import espaloma


# Handle versioneer
from ._version import get_versions

#
# from openff.toolkit.utils.toolkits import ToolkitRegistry, OpenEyeToolkitWrapper, RDKitToolkitWrapper, AmberToolsToolkitWrapper
# toolkit_registry = ToolkitRegistry()
# toolkit_precedence = [ RDKitToolkitWrapper ] # , OpenEyeToolkitWrapper, AmberToolsToolkitWrapper]
# [ toolkit_registry.register_toolkit(toolkit) for toolkit in toolkit_precedence if toolkit.is_available() ]
#


versions = get_versions()
__version__ = versions["version"]
__git_revision__ = versions["full-revisionid"]
del get_versions, versions

from . import _version
__version__ = _version.get_versions()['version']


================================================
FILE: espaloma/_version.py
================================================

# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain.
# Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer

"""Git implementation of _version.py."""

import errno
import os
import re
import subprocess
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple
import functools


def get_keywords() -> Dict[str, str]:
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
    git_refnames = "$Format:%d$"
    git_full = "$Format:%H$"
    git_date = "$Format:%ci$"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

    VCS: str
    style: str
    tag_prefix: str
    parentdir_prefix: str
    versionfile_source: str
    verbose: bool


def get_config() -> VersioneerConfig:
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
    cfg = VersioneerConfig()
    cfg.VCS = "git"
    cfg.style = "pep440"
    cfg.tag_prefix = ""
    cfg.parentdir_prefix = "None"
    cfg.versionfile_source = "espaloma/_version.py"
    cfg.verbose = False
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}


def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""
    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f
    return decorate


def run_command(
    commands: List[str],
    args: List[str],
    cwd: Optional[str] = None,
    verbose: bool = False,
    hide_stderr: bool = False,
    env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
    process = None

    popen_kwargs: Dict[str, Any] = {}
    if sys.platform == "win32":
        # This hides the console window if pythonw.exe is used
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        popen_kwargs["startupinfo"] = startupinfo

    for command in commands:
        try:
            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                       stdout=subprocess.PIPE,
                                       stderr=(subprocess.PIPE if hide_stderr
                                               else None), **popen_kwargs)
            break
        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %s" % dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
    stdout = process.communicate()[0].strip().decode()
    if process.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
        return None, process.returncode
    return stdout, process.returncode


def versions_from_parentdir(
    parentdir_prefix: str,
    root: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {"version": dirname[len(parentdir_prefix):],
                    "full-revisionid": None,
                    "dirty": False, "error": None, "date": None}
        rootdirs.append(root)
        root = os.path.dirname(root)  # up a level

    if verbose:
        print("Tried directories %s but none started with prefix %s" %
              (str(rootdirs), parentdir_prefix))
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords: Dict[str, str] = {}
    try:
        with open(versionfile_abs, "r") as fobj:
            for line in fobj:
                if line.strip().startswith("git_refnames ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["refnames"] = mo.group(1)
                if line.strip().startswith("git_full ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["full"] = mo.group(1)
                if line.strip().startswith("git_date ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["date"] = mo.group(1)
    except OSError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
    keywords: Dict[str, str],
    tag_prefix: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Get version information from git keywords."""
    if "refnames" not in keywords:
        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = {r for r in refs if re.search(r'\d', r)}
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
        print("likely tags: %s" % ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
            # Filter out refs that exactly match prefix or that don't start
            # with a number once the prefix is stripped (mostly a concern
            # when prefix is '')
            if not re.match(r'\d', r):
                continue
            if verbose:
                print("picking %s" % r)
            return {"version": r,
                    "full-revisionid": keywords["full"].strip(),
                    "dirty": False, "error": None,
                    "date": date}
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {"version": "0+unknown",
            "full-revisionid": keywords["full"].strip(),
            "dirty": False, "error": "no suitable tags", "date": None}


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
    tag_prefix: str,
    root: str,
    verbose: bool,
    runner: Callable = run_command
) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    # GIT_DIR can interfere with correct operation of Versioneer.
    # It may be intended to be passed to the Versioneer-versioned project,
    # but that should not change where we get our version from.
    env = os.environ.copy()
    env.pop("GIT_DIR", None)
    runner = functools.partial(runner, env=env)

    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
                   hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = runner(GITS, [
        "describe", "--tags", "--dirty", "--always", "--long",
        "--match", f"{tag_prefix}[[:digit:]]*"
    ], cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
                             cwd=root)
    # --abbrev-ref was added in git-1.6.3
    if rc != 0 or branch_name is None:
        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
    branch_name = branch_name.strip()

    if branch_name == "HEAD":
        # If we aren't exactly on a branch, pick a branch which represents
        # the current commit. If all else fails, we are on a branchless
        # commit.
        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
        # --contains was added in git-1.5.4
        if rc != 0 or branches is None:
            raise NotThisMethod("'git branch --contains' returned error")
        branches = branches.split("\n")

        # Remove the first line if we're running detached
        if "(" in branches[0]:
            branches.pop(0)

        # Strip off the leading "* " from the list of branches.
        branches = [branch[2:] for branch in branches]
        if "master" in branches:
            branch_name = "master"
        elif not branches:
            branch_name = None
        else:
            # Pick the first branch that is returned. Good or bad.
            branch_name = branches[0]

    pieces["branch"] = branch_name

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[:git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%s'"
                               % describe_out)
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
                               % (full_tag, tag_prefix))
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix):]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_branch(pieces: Dict[str, Any]) -> str:
    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .

    The ".dev0" means not master branch. Note that .dev0 sorts backwards
    (a feature branch will appear "older" than the master branch).

    Exceptions:
    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0"
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+untagged.%d.g%s" % (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
    """Split pep440 version string at the post-release segment.

    Returns the release segments before the post-release and the
    post-release version number (or -1 if no post-release segment is present).
    """
    vc = str.split(ver, ".post")
    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None


def render_pep440_pre(pieces: Dict[str, Any]) -> str:
    """TAG[.postN.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        if pieces["distance"]:
            # update the post release segment
            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
            rendered = tag_version
            if post_version is not None:
                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
            else:
                rendered += ".post0.dev%d" % (pieces["distance"])
        else:
            # no commits, use the tag as the version
            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
    return rendered


def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .

    The ".dev0" means not master branch.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {"version": "unknown",
                "full-revisionid": pieces.get("long"),
                "dirty": None,
                "error": pieces["error"],
                "date": None}

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-branch":
        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-post-branch":
        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%s'" % style)

    return {"version": rendered, "full-revisionid": pieces["long"],
            "dirty": pieces["dirty"], "error": None,
            "date": pieces.get("date")}


def get_versions() -> Dict[str, Any]:
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
    # case we can only use expanded keywords.

    cfg = get_config()
    verbose = cfg.verbose

    try:
        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
                                          verbose)
    except NotThisMethod:
        pass

    try:
        root = os.path.realpath(__file__)
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
        for _ in cfg.versionfile_source.split('/'):
            root = os.path.dirname(root)
    except NameError:
        return {"version": "0+unknown", "full-revisionid": None,
                "dirty": None,
                "error": "unable to find root of source tree",
                "date": None}

    try:
        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
        return render(pieces, cfg.style)
    except NotThisMethod:
        pass

    try:
        if cfg.parentdir_prefix:
            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
    except NotThisMethod:
        pass

    return {"version": "0+unknown", "full-revisionid": None,
            "dirty": None,
            "error": "unable to compute version", "date": None}


================================================
FILE: espaloma/app/__init__.py
================================================
from . import experiment, report


================================================
FILE: espaloma/app/experiment.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import abc
import copy
import torch

import espaloma as esp


# =============================================================================
# MODULE CLASSES
# =============================================================================
class Experiment(abc.ABC):
    """Base class for espaloma experiment."""

    def __init__(self):
        super(Experiment, self).__init__()


class Train(Experiment):
    """Training experiment.

    Parameters
    ----------
    net : `torch.nn.Module`
        Neural networks that inputs graph representation and outputs
        parameterized or typed graph for molecular mechanics.

    data : `espaloma.data.dataset.Dataset`
        or `torch.utils.data.DataLoader`
        Dataset.

    metrics : `List` of `callable`
        List of loss functions to be used (summed) in training.

    optimizer : `torch.optim.Optimizer`
        Optimizer used for training.

    n_epochs : `int`
        Number of epochs.

    record_interval : `int`
        Interval at which states are recorded.

    Methods
    -------
    train_once : Train the network for exactly once.

    train : Execute `train_once` for `n_epochs` times and record states
        every `record_interval`.

    """

    def __init__(
        self,
        net,
        data,
        metrics=[esp.metrics.TypingCrossEntropy()],
        optimizer=lambda net: torch.optim.Adam(net.parameters(), 1e-3),
        n_epochs=100,
        record_interval=1,
        normalize=esp.data.normalize.ESOL100LogNormalNormalize,
        scheduler=None,
        device=torch.device("cpu"),
    ):
        super(Train, self).__init__()

        # bookkeeping
        self.device = device
        if isinstance(net, torch.nn.DataParallel):
            self.net = net
        elif isinstance(net, torch.nn.parallel.DistributedDataParallel):
            self.net = net
        else:
            self.net = net.to(self.device)
        self.data = data
        self.metrics = metrics
        self.n_epochs = n_epochs
        self.record_interval = record_interval
        self.normalize = normalize()
        self.states = {}
        self.scheduler = scheduler

        # make optimizer
        if callable(optimizer):
            self.optimizer = optimizer(net)
        else:
            self.optimizer = optimizer

        # compose loss function
        def loss(g):
            _loss = 0.0
            for metric in self.metrics:
                _loss += metric(g)

            return _loss

        self.loss = loss

    def train_once(self):
        """Train the model for one batch."""
        for idx, g in enumerate(
            self.data
        ):  # TODO: does this have to be a single g?

            if isinstance(self.optimizer, torch.optim.LBFGS):
                retain_graph = True
            else:
                retain_graph = False

            g = g.to(self.device)
            self.net.train()

            def closure(g=g):
                self.optimizer.zero_grad()
                g = self.net(g)
                g = self.normalize.unnorm(g)

                loss = self.loss(g)
                loss.backward(retain_graph=retain_graph)
                if idx == 0:
                    if torch.isnan(loss).cpu().numpy().item() is True:
                        raise RuntimeError("Loss is Nan.")
                return loss

            loss = closure()
            self.optimizer.step()

            if self.scheduler is not None:
                self.scheduler.step(loss)

    def train(self):
        """Train the model for multiple steps and
        record the weights once every `record_interval`

        """

        for epoch_idx in range(int(self.n_epochs)):

            self.train_once()

            # record when `record_interval` is hit
            if epoch_idx % self.record_interval == 0:
                self.states[epoch_idx] = copy.deepcopy(self.net.state_dict())

        # record final state
        self.states["final"] = copy.deepcopy(self.net.state_dict())

        return self.net


class Test(Experiment):
    """Test experiment.

    Parameters
    ----------
    net : `torch.nn.Module`
        Neural networks that inputs graph representation and outputs
        parameterized or typed graph for molecular mechanics.

    data : `espaloma.data.dataset.Dataset`
        or `torch.utils.data.DataLoader`
        Dataset.

    metrics : `List` of `callable`
        List of loss functions to be used (summed) in training.


    """

    def __init__(
        self,
        net,
        data,
        states,
        metrics=[esp.metrics.TypingCrossEntropy()],
        normalize=esp.data.normalize.NotNormalize,
        sampler=None,
        device=torch.device("cpu"),  # it should cpu
    ):
        # bookkeeping
        self.device = device
        self.net = net.to(self.device)
        self.data = data
        self.states = states
        self.metrics = metrics
        self.sampler = sampler
        self.normalize = normalize()

    def test(self):
        """Run tests."""

        results = {}

        # loop through the metrics
        for metric in self.metrics:
            results[metric.__name__] = {}

        # NOTE: we are not doing this here since this will lead to OOM
        # from time to time
        # make it just one giant graph
        # g = list(self.data)
        # g = dgl.batch(g)
        # g = g.to(self.device)

        if self.states is None:
            self.states = {"final": None}

        for state_name, state in self.states.items():  # loop through states
            if state is not None:
                # load the state dict
                self.net.load_state_dict(state)

            self.net.eval()

            for metric in self.metrics:
                assert isinstance(metric, esp.metrics.Metric)
                input_fn, target_fn = metric.between

                inputs = []
                targets = []

                for g in self.data:
                    with g.local_scope():
                        g = g.to(self.device)
                        g_input = self.normalize.unnorm(self.net(g))
                        inputs.append(input_fn(g_input).detach())
                        targets.append(target_fn(g_input).detach())

                inputs = torch.cat(inputs, dim=0)
                targets = torch.cat(targets, dim=0)

                # loop through the metrics
                results[metric.__name__][state_name] = (
                    metric.base_metric(inputs, targets).detach().cpu().numpy()
                )

        self.ref_g = self.normalize.unnorm(self.net(g)).to(
            torch.device("cpu")
        )

        for term in self.ref_g.ntypes:
            for param in self.ref_g.nodes[term].data.keys():
                g.nodes[term].data[param] = g.nodes[term].data[param].detach()

        # point this to self
        self.results = results


class TrainAndTest(Experiment):
    """Train a model and then test it."""

    def __init__(
        self,
        net,
        ds_tr,
        ds_te,
        ds_vl=None,
        metrics_tr=[esp.metrics.TypingCrossEntropy()],
        metrics_te=[esp.metrics.TypingCrossEntropy()],
        optimizer=lambda net: torch.optim.Adam(net.parameters(), 1e-2),
        normalize=esp.data.normalize.NotNormalize,
        n_epochs=100,
        record_interval=1,
        device=torch.device("cpu"),
        scheduler=None,
    ):

        # bookkeeping
        self.device = device
        self.net = net
        self.ds_tr = ds_tr
        self.ds_te = ds_te
        self.ds_vl = ds_vl
        self.optimizer = optimizer
        self.n_epochs = n_epochs
        self.metrics_tr = metrics_tr
        self.metrics_te = metrics_te
        self.normalize = normalize
        self.record_interval = record_interval
        self.scheduler = scheduler

    def __str__(self):
        _str = ""
        _str += "# model"
        _str += "\n"
        _str += str(self.net)
        _str += "\n"
        if hasattr(self.net, "noise_model"):
            _str += "# noise model"
            _str += "\n"
            _str += str(self.net.noise_model)
            _str += "\n"
        _str += "# optimizer"
        _str += "\n"
        _str += str(self.optimizer)
        _str += "\n"
        _str += "# n_epochs"
        _str += "\n"
        _str += str(self.n_epochs)
        _str += "\n"
        return _str

    def run(self):
        """Run train and test."""
        train = Train(
            net=self.net,
            data=self.ds_tr,
            optimizer=self.optimizer,
            n_epochs=self.n_epochs,
            metrics=self.metrics_tr,
            normalize=self.normalize,
            device=self.device,
            record_interval=self.record_interval,
            scheduler=self.scheduler,
        )

        train.train()

        self.states = train.states

        test = Test(
            net=self.net,
            data=self.ds_te,
            metrics=self.metrics_te,
            states=self.states,
            normalize=self.normalize,
            device=self.device,
        )

        test.test()

        self.ref_g_test = test.ref_g

        self.results_te = test.results

        test = Test(
            net=self.net,
            data=self.ds_tr,
            metrics=self.metrics_te,
            states=self.states,
            normalize=self.normalize,
            device=self.device,
        )

        test.test()
        self.ref_g_training = test.ref_g

        self.results_tr = test.results

        if self.ds_vl is not None:

            test = Test(
                net=self.net,
                data=self.ds_vl,
                metrics=self.metrics_te,
                states=self.states,
                normalize=self.normalize,
                device=self.device,
            )

            test.test()
            self.ref_g_validation = test.ref_g

            self.results_vl = test.results

            return {
                "test": self.results_te,
                "train": self.results_tr,
                "validate": self.results_vl,
            }

        return {"test": self.results_te, "train": self.results_tr}


================================================
FILE: espaloma/app/report.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import numpy as np
import pandas as pd


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def dataframe(results_dict):
    # get all the results
    metrics = list(list(results_dict.values())[0].keys())
    ds_names = list(results_dict.keys())
    df = pd.DataFrame(
        [
            [value["final"].round(4) for metric, value in results.items()]
            for ds_name, results in results_dict.items()
        ],
        columns=metrics,
        index=ds_names,
    )
    return df


def curve(results_dict):
    curve_dict = {}

    # get all the results
    metrics = list(list(results_dict.values())[0].keys())

    # loop through metrics
    for idx_metric, metric in enumerate(metrics):

        # loop through the results
        for ds_name, results in results_dict.items():

            # get all the recorded indices
            idxs = list(
                [
                    key
                    for key in results[metric].keys()
                    if isinstance(key, int)
                ]
            )

            curve_dict[(metric, ds_name)] = np.array(
                [results[metric][idx] for idx in idxs]
            )

    return curve_dict


def markdown(results_dict):
    df = dataframe(results_dict)
    return df.transpose().to_markdown()


def visual(results_dict):
    # make plots less ugly
    from matplotlib import pyplot as plt

    plt.rc("font", size=14)
    plt.rc("lines", linewidth=6)

    # initialize the figure
    fig = plt.figure(figsize=(8, 3))

    # get all the results
    metrics = list(list(results_dict.values())[0].keys())
    n_metrics = len(metrics)

    # loop through metrics
    for idx_metric, metric in enumerate(metrics):
        ax = plt.subplot(1, n_metrics, idx_metric + 1)

        # loop through the results
        for ds_name, results in results_dict.items():

            # get all the recorded indices
            idxs = list(
                [
                    key
                    for key in results[metric].keys()
                    if isinstance(key, int)
                ]
            )

            # sort it ascending
            idxs.sort()

            ax.plot(
                idxs, [results[metric][idx] for idx in idxs], label=ds_name
            )

        ax.set_xlabel("epochs")
        ax.set_ylabel(metric)

    plt.tight_layout()
    plt.legend()

    return fig


def visual_multiple(results_dicts):
    from matplotlib import cm as cm
    from matplotlib import pyplot as plt

    plt.rc("font", size=14)
    plt.rc("lines", linewidth=4)

    # initialize the figure
    fig = plt.figure()

    # get all the results
    metrics = list(list(results_dicts[0][1].values())[0].keys())
    n_metrics = len(metrics)

    # loop through metrics
    for idx_metric, metric in enumerate(metrics):
        ax = plt.subplot(n_metrics, 1, idx_metric + 1)

        # loop through results
        for idx_result, config_and_results_dict in enumerate(results_dicts):

            config, results_dict = config_and_results_dict

            for ds_name, results in results_dict.items():

                # get all the recorded indices
                idxs = list(
                    [
                        key
                        for key in results[metric].keys()
                        if isinstance(key, int)
                    ]
                )

                # sort it ascending
                idxs.sort()

                label = None
                linestyle = "dotted"

                if ds_name == "training":
                    label = config["#"]
                    linestyle = "solid"

                ax.plot(
                    idxs,
                    [results[metric][idx] for idx in idxs],
                    label=label,
                    c=cm.gist_rainbow(
                        (float(idx_result) / len(results_dicts))
                    ),
                    linestyle=linestyle,
                    alpha=0.8,
                )

        ax.set_xlabel("epochs")
        ax.set_ylabel(metric)

    plt.legend(bbox_to_anchor=(1.04, 0), loc="lower left")
    plt.tight_layout()

    return fig


def visual_base64(results_dict):
    fig = visual(results_dict)
    import base64
    import io

    img = io.BytesIO()
    fig.savefig(img, format="png", dpi=50)
    img.seek(0)
    img = base64.b64encode(img.read()).decode("utf-8")
    # img = "![img](data:image/png;base64%s)" % img
    return img


def html(results_dict):
    html_string = ""

    if isinstance(results_dict, dict):
        results_dict = [results_dict]

    for _results_dict in results_dict:

        html_string += """
        <p>
        <div style='height:15%%;width:100%%;'>
            <div style='float:left'>
                <img src='data:image/png;base64, %s'/>
            </div>
            <div style='float:left'>
                %s
            </div>
        </div>
        <br><br><br>
        <p/>
        """ % (
            visual_base64(_results_dict)[:-1],
            dataframe(_results_dict).to_html(),
        )

    return html_string


def html_multiple_train_and_test(results):
    html_string = ""
    for param, result in results:
        html_string += "<p><br><br><br>" + str(param) + "<p/>"
        html_string += html(result)
        html_string += "<br><br><br>"

    return html_string


def html_multiple_train_and_test_2d_grid(results):
    # make sure there are only two paramter types
    import copy

    results = copy.deepcopy(results)

    for result in results:
        result[0].pop("#")

    param_names = list(results[0][0].keys())
    assert len(param_names) == 2
    param_col_name, param_row_name = param_names

    param_col_values = list(
        set([result[0][param_col_name] for result in results])
    )
    param_row_values = list(
        set([result[0][param_row_name] for result in results])
    )

    param_col_values.sort()
    param_row_values.sort()

    # initialize giant table in nested lists
    table = [["NA" for _ in param_col_values] for _ in param_row_values]

    # populate this table
    for idx_col, param_col in enumerate(param_col_values):
        for idx_row, param_row in enumerate(param_row_values):
            param_dict = {
                param_col_name: param_col,
                param_row_name: param_row,
            }

            # TODO:
            # make this less ugly

            for result in results:

                if result[0] == param_dict:
                    table[idx_row][idx_col] = html(result[1])

    html_string = ""
    html_string += "<table style='border: 1px solid black'>"

    # first row
    html_string += "<thread><tr style='border: 1px solid black'>"
    html_string += (
        "<th style='border: 1px solid black'>"
        + param_row_name
        + "/"
        + param_col_name
        + "</th>"
    )

    for param_col in param_col_values:
        html_string += (
            "<th style='border: 1px solid black'>" + str(param_col) + "</th>"
        )

    html_string += "</tr></thread>"

    # the rest of the rows
    for idx_row, param_row in enumerate(param_row_values):
        html_string += "<tr style='border: 1px solid black'>"

        # html_string += "<td></td>"

        html_string += (
            "<th style='border: 1px solid black'>" + param_row + " </th>"
        )

        for idx_col, param_col in enumerate(param_col_values):
            html_string += (
                "<td style='border: 1px solid black'>"
                + table[idx_row][idx_col]
                + "</td>"
            )

        html_string += "</tr>"

    html_string += "</table>"
    return html_string


================================================
FILE: espaloma/app/tests/test_experiment.py
================================================
import pytest
import torch


def test_import():
    import espaloma as esp

    esp.app.experiment


@pytest.fixture
def data():
    import espaloma as esp

    esol = esp.data.esol(first=20)

    # do some typing
    typing = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81")
    esol.apply(typing, in_place=True)  # this modify the original data

    return esol.view("graph", batch_size=10)


@pytest.fixture
def net():
    import espaloma as esp

    # define a layer
    layer = esp.nn.layers.dgl_legacy.gn("GraphConv")

    # define a representation
    representation = esp.nn.Sequential(
        layer, [32, "tanh", 32, "tanh", 32, "tanh"]
    )

    # define a readout
    readout = esp.nn.readout.node_typing.NodeTyping(
        in_features=32, n_classes=100
    )  # not too many elements here I think?

    net = torch.nn.Sequential(representation, readout)

    return net


def test_data_and_net(data, net):
    data
    net


@pytest.fixture
def train(data, net):
    import espaloma as esp

    train = esp.app.experiment.Train(
        net=net,
        data=data,
        n_epochs=1,
        metrics=[
            esp.metrics.GraphMetric(
                base_metric=torch.nn.CrossEntropyLoss(),
                between=["nn_typing", "legacy_typing"],
            )
        ],
    )

    return train


def test_train(train):
    train.train()


def test_test(train, net, data):
    import espaloma as esp

    train.train()
    test = esp.app.experiment.Test(net=net, data=data, states=train.states)


def test_train_and_test(net, data):
    import espaloma as esp

    train_and_test = esp.app.experiment.TrainAndTest(
        net=net, n_epochs=1, ds_tr=data, ds_te=data
    )


================================================
FILE: espaloma/app/train.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import argparse
import os

import numpy as np
import torch

import espaloma as esp


def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield
    )

    # param / typing
    operation = getattr(forcefield, args.operation)

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    # readout
    if args.readout == "node_typing":
        readout = esp.nn.readout.node_typing.NodeTyping(
            in_features=units, n_classes=args.n_classes
        )

    if args.readout == "janossy":
        readout = esp.nn.readout.janossy.JanossyPooling(
            in_features=units, config=args.janossy_config
        )

    net = torch.nn.Sequential(representation, readout)

    training_metrics = [
        getattr(esp.metrics, metric)() for metric in args.training_metrics
    ]

    test_metrics = [
        getattr(esp.metrics, metric)() for metric in args.test_metrics
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=[
            getattr(esp.metrics, metric)() for metric in args.training_metrics
        ],
        metrics_te=[
            getattr(esp.metrics, metric)() for metric in args.test_metrics
        ],
        n_epochs=args.n_epochs,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", default="esol", type=str)
    parser.add_argument("--first", default=-1, type=int)
    parser.add_argument("--readout", default="node_typing", type=str)
    parser.add_argument("--partition", default="4:1", type=str)
    parser.add_argument("--batch_size", default=8, type=int)
    parser.add_argument("--forcefield", default="gaff-1.81", type=str)
    parser.add_argument("--operation", default="typing", type=str)
    parser.add_argument("--layer", default="GraphConv", type=str)
    parser.add_argument("--n_classes", default=100, type=int)
    parser.add_argument(
        "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"]
    )

    parser.add_argument(
        "--training_metrics", nargs="*", default=["TypingCrossEntropy"]
    )
    parser.add_argument(
        "--test_metrics", nargs="*", default=["TypingAccuracy"]
    )

    parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"])

    parser.add_argument("--n_epochs", default=10, type=int)

    args = parser.parse_args()

    run(args)


================================================
FILE: espaloma/app/train_all_params.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import argparse
import numpy as np
import torch

import espaloma as esp


def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield
    )

    # param / typing
    operation = forcefield.parametrize

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=args.janossy_config,
        out_features={
            2: ["k", "eq"],
            3: ["k", "eq"],
        },
    )

    net = torch.nn.Sequential(representation, readout)

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.L1Loss(),
            between=[param, param + "_ref"],
            level=term,
        )
        for param in ["k", "eq"]
        for term in ["n2", "n3"]
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=base_metric,
            between=[param, param + "_ref"],
            level=term,
        )
        for param in ["k", "eq"]
        for term in ["n2", "n3"]
        for base_metric in [esp.metrics.rmse, esp.metrics.r2]
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import os

    os.mkdir(args.out)

    with open(args.out + "/architecture.txt", "w") as f_handle:
        f_handle.write(str(exp))

    with open(args.out + "/result_table.md", "w") as f_handle:
        f_handle.write(esp.app.report.markdown(results))

    curves = esp.app.report.curve(results)

    for spec, curve in curves.items():
        np.save(args.out + "/" + "_".join(spec) + ".npy", curve)

    import pickle

    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", default="alkethoh", type=str)
    parser.add_argument("--out", default="results", type=str)
    parser.add_argument("--first", default=-1, type=int)
    parser.add_argument("--partition", default="4:1", type=str)
    parser.add_argument("--batch_size", default=8, type=int)
    parser.add_argument(
        "--forcefield", default="smirnoff99Frosst-1.1.0", type=str
    )
    parser.add_argument("--layer", default="GraphConv", type=str)
    parser.add_argument("--n_classes", default=100, type=int)
    parser.add_argument(
        "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"]
    )

    parser.add_argument(
        "--training_metrics", nargs="*", default=["TypingCrossEntropy"]
    )
    parser.add_argument(
        "--test_metrics", nargs="*", default=["TypingAccuracy"]
    )

    parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"])

    parser.add_argument("--n_epochs", default=10, type=int)

    args = parser.parse_args()

    run(args)


================================================
FILE: espaloma/app/train_bonded_energy.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import argparse
import os

import numpy as np
import torch

import espaloma as esp


def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield
    )

    # param / typing
    operation = forcefield.parametrize

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # apply simulation
    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(
        n_samples=1000, n_steps_per_sample=10
    )

    data = data.apply(simulation.run, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=args.janossy_config,
    )

    net = torch.nn.Sequential(
        representation,
        readout,
        esp.mm.geometry.GeometryInGraph(),
        esp.mm.energy.EnergyInGraph(),
        esp.mm.energy.EnergyInGraph(suffix="_ref"),
    )

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.L1Loss(), between=["u", "u_ref"], level="g"
        )
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=base_metric,
            between=[param, param + "_ref"],
            level=term,
        )
        for param in ["u"]
        for term in ["g"]
        for base_metric in [esp.metrics.rmse, esp.metrics.r2]
    ]

    optimizer = getattr(torch.optim, args.optimizer)(
        net.parameters(), lr=args.lr
    )

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
        normalize=esp.data.normalize.PositiveNotNormalize,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))

    import os

    os.mkdir(args.out)

    with open(args.out + "/architecture.txt", "w") as f_handle:
        f_handle.write(str(exp))

    with open(args.out + "/result_table.md", "w") as f_handle:
        f_handle.write(esp.app.report.markdown(results))

    curves = esp.app.report.curve(results)

    for spec, curve in curves.items():
        np.save(args.out + "/" + "_".join(spec) + ".npy", curve)

    import pickle

    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)

    print(esp.app.report.markdown(results))

    import pickle

    with open(args.out + "/ref_g_test.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_test, f_handle)

    with open(args.out + "/ref_g_training.th", "wb") as f_handle:
        pickle.dump(exp.ref_g_training, f_handle)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", default="alkethoh", type=str)
    parser.add_argument("--first", default=-1, type=int)
    parser.add_argument("--partition", default="4:1", type=str)
    parser.add_argument("--batch_size", default=8, type=int)
    parser.add_argument(
        "--forcefield", default="smirnoff99Frosst-1.1.0", type=str
    )
    parser.add_argument("--layer", default="GraphConv", type=str)
    parser.add_argument("--n_classes", default=100, type=int)
    parser.add_argument(
        "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"]
    )

    parser.add_argument(
        "--training_metrics", nargs="*", default=["TypingCrossEntropy"]
    )
    parser.add_argument(
        "--test_metrics", nargs="*", default=["TypingAccuracy"]
    )
    parser.add_argument("--out", default="results", type=str)
    parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"])

    parser.add_argument("--n_epochs", default=10, type=int)

    parser.add_argument("--optimizer", default="Adam", type=str)
    parser.add_argument("--lr", default=1e-3, type=float)

    args = parser.parse_args()

    run(args)


================================================
FILE: espaloma/app/train_multi_typing.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import argparse
import torch

import espaloma as esp


def run(args):
    # define data
    data = getattr(esp.data, args.data)(first=args.first)

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        args.forcefield
    )

    # param / typing
    operation = forcefield.multi_typing

    # apply to dataset
    data = data.apply(operation, in_place=True)

    # split
    partition = [int(x) for x in args.partition.split(":")]
    ds_tr, ds_te = data.split(partition)

    # batch
    ds_tr = ds_tr.view("graph", batch_size=args.batch_size)
    ds_te = ds_te.view("graph", batch_size=args.batch_size)

    # layer
    layer = esp.nn.layers.dgl_legacy.gn(args.layer)

    # representation
    representation = esp.nn.Sequential(layer, config=args.config)

    # get the last bit of units
    units = [x for x in args.config if isinstance(x, int)][-1]

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=args.janossy_config,
        out_features={
            1: {"nn_typing": 100},
            2: {"nn_typing": 100},
            3: {"nn_typing": 100},
        },
    )

    net = torch.nn.Sequential(representation, readout)

    metrics_tr = [
        esp.metrics.GraphMetric(
            base_metric=torch.nn.CrossEntropyLoss(),
            between=["nn_typing", "legacy_typing"],
            level=term,
        )
        for term in ["n1", "n2", "n3"]
    ]

    metrics_te = [
        esp.metrics.GraphMetric(
            base_metric=esp.metrics.accuracy,
            between=["nn_typing", "legacy_typing"],
            level=term,
        )
        for term in ["n1", "n2", "n3"]
    ]

    exp = esp.TrainAndTest(
        ds_tr=ds_tr,
        ds_te=ds_te,
        net=net,
        metrics_tr=metrics_tr,
        metrics_te=metrics_te,
        n_epochs=args.n_epochs,
    )

    results = exp.run()

    print(esp.app.report.markdown(results))


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--data", default="alkethoh", type=str)
    parser.add_argument("--first", default=-1, type=int)
    parser.add_argument("--partition", default="4:1", type=str)
    parser.add_argument("--batch_size", default=8, type=int)
    parser.add_argument(
        "--forcefield", default="smirnoff99Frosst-1.1.0", type=str
    )
    parser.add_argument("--layer", default="GraphConv", type=str)
    parser.add_argument("--n_classes", default=100, type=int)
    parser.add_argument(
        "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"]
    )

    parser.add_argument(
        "--training_metrics", nargs="*", default=["TypingCrossEntropy"]
    )
    parser.add_argument(
        "--test_metrics", nargs="*", default=["TypingAccuracy"]
    )

    parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"])

    parser.add_argument("--n_epochs", default=10, type=int)

    args = parser.parse_args()

    run(args)


================================================
FILE: espaloma/data/__init__.py
================================================
""" Handles the dataset and collections of espaloma. """
from . import dataset, md, normalize, utils, qcarchive_utils, md17_utils
from .collection import *


================================================
FILE: espaloma/data/collection.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import espaloma as esp


# =============================================================================
# MODULE CLASSES
# =============================================================================
def esol(*args, **kwargs):
    """ESOL collection.

    ..[1] ESOL:  Estimating Aqueous Solubility Directly from Molecular Structure
        John S. Delaney
        Journal of Chemical Information and Computer Sciences
        2004 44 (3), 1000-1005
        DOI: 10.1021/ci034243x
    """
    import os

    import pandas as pd

    path = os.path.dirname(esp.__file__) + "/data/esol.csv"
    df = pd.read_csv(path)
    smiles = df.iloc[:, -1]
    return esp.data.dataset.GraphDataset(smiles, *args, **kwargs)


def alkethoh(*args, **kwargs):
    """AlkEthOH collection.

    ..[1] Open Force Field Consortium: Escaping atom types using direct chemical
    perception with SMIRNOFF v0.1
    David L. Mobley, Caitlin C. Bannan, Andrea Rizzi, Christopher I. Bayly,
    John D. Chodera, Victoria T. Lim, Nathan M. Lim, Kyle A. Beauchamp,
    Michael R. Shirts, Michael K. Gilson, Peter K. Eastman
    bioRxiv 286542; doi: https://doi.org/10.1101/286542

    """
    import os

    import pandas as pd

    df = pd.concat(
        [
            pd.read_csv(
                "https://raw.githubusercontent.com/openff.toolkit/"
                "open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/"
                "AlkEthOH_rings.smi",
                header=None,
            ),
            pd.read_csv(
                "https://raw.githubusercontent.com/openff.toolkit/"
                "open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/"
                "AlkEthOH_chain.smi",
                header=None,
            ),
        ],
        axis=0,
    )

    smiles = df.iloc[:, 0].values
    return esp.data.dataset.GraphDataset(smiles, *args, **kwargs)


def zinc(first=-1, *args, **kwargs):
    """ZINC collection.

    ..[1] Irwin, John J, and Brian K Shoichet.
    “ZINC
    --a free database of commercially available compounds for virtual screening.”
    Journal of chemical information and modeling
    vol. 45,1 (2005): 177-82. doi:10.1021/ci049714+
    """
    import tarfile
    from os.path import exists
    from openff.toolkit.topology import Molecule
    from rdkit import Chem

    fname = "parm_at_Frosst.tgz"
    url = "http://www.ccl.net/cca/data/parm_at_Frosst/parm_at_Frosst.tgz"

    if not exists(fname):
        import urllib.request

        urllib.request.urlretrieve(url, fname)

    archive = tarfile.open(fname)
    zinc_file = archive.extractfile("parm_at_Frosst/zinc.sdf")
    _mols = Chem.ForwardSDMolSupplier(zinc_file, removeHs=False)

    count = 0
    gs = []

    for mol in _mols:
        try:
            gs.append(
                esp.Graph(
                    Molecule.from_rdkit(mol, allow_undefined_stereo=True)
                )
            )

            count += 1

        except:
            pass

        if first != -1 and count >= first:
            break

    return esp.data.dataset.GraphDataset(gs, *args, **kwargs)


def md17_old(*args, **kwargs):
    return [
        esp.data.md17_utils.get_molecule(name, *args, **kwargs)
        for name in [
            "benzene",
            "uracil",
            "naphthalene",
            "aspirin",
            "salicylic",
            "malonaldehyde",
            "ethanol",
            "toluene",
            "paracetamol",
            "azobenzene",
        ]
    ]


def md17_new(*args, **kwargs):
    return [
        esp.data.md17_utils.get_molecule(name, *args, **kwargs).heterograph
        for name in [
            "paracetamol",
            "azobenzene",
            "benzene",
            "ethanol",
        ]
    ]


class qca(object):
    pass


df_names = [
    "Bayer",
    "Coverage",
    "eMolecules",
    "Pfizer",
    "Roche",
    "Benchmark",
    "fda",
]


def _get_ds(cls, df_name):
    import os
    import pandas as pd

    path = os.path.dirname(esp.__file__) + "/../data/qca/%s.h5" % df_name
    df = pd.read_hdf(path)
    ds = esp.data.qcarchive_utils.h5_to_dataset(df)
    return ds


from functools import partial

for df_name in df_names:
    setattr(
        qca,
        df_name.lower(),
        classmethod(partial(_get_ds, df_name=df_name)),
    )


================================================
FILE: espaloma/data/dataset.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import abc

import torch

import espaloma as esp


# =============================================================================
# MODULE CLASSES
# =============================================================================
class Dataset(abc.ABC, torch.utils.data.Dataset):
    """The base class of map-style dataset.

    Parameters
    ----------
    graphs : List
        objects in the dataset

    Methods
    -------
    shuffle
        Randomly shuffle the graphs in the dataset.

    apply(fn, in_place=True)
        Apply a function to every graph in the dataset.
        If `in_place=True`, modify the graph in-place.

    split(partitions)
        Split the dataset into partitions

    subsample(ratio, seed=None)
        Subsample the dataset.

    save(path)
        Save the dataset to a local path.

    load(path)
        Load a dataset from local path.

    Note
    ----
    This also supports iterative-style dataset by deleting `__getitem__`
    and `__len__` function.

    Attributes
    ----------
    transforms : an iterable of callables that transforms the input.
        the `__getiem__` method applies these transforms later.

    Examples
    --------
    >>> data = Dataset([esp.Graph("C")])

    """

    def __init__(self, graphs=None):
        super(Dataset, self).__init__()
        self.graphs = graphs
        self.transforms = None

    def __len__(self):
        # 0 len if no graphs
        if self.graphs is None:
            return 0

        else:
            return len(self.graphs)

    def __getitem__(self, idx):
        if self.graphs is None:
            raise RuntimeError("Empty molecule dataset.")

        if isinstance(idx, int):  # sinlge element
            if self.transforms is None:  # when no transform act like list
                return self.graphs[idx]

            else:
                graph = self.graphs[idx]

                # nested transforms
                for transform in self.transforms:
                    graph = transform(graph)

                return graph

        elif isinstance(idx, slice):
            # implement slicing
            if self.transforms is None:
                # return a Dataset object rather than list
                return self.__class__(graphs=self.graphs[idx])
            else:
                graphs = []
                for graph in self.graphs[idx]:

                    # nested transforms
                    for transform in self.transforms:
                        graph = transform(graph)
                    graphs.append(graph)

                return self.__class__(graphs=graphs)

        elif isinstance(idx, list):
            # implement slicing
            if self.transforms is None:
                # return a Dataset object rather than list
                return self.__class__(
                    graphs=[self.graphs[_idx] for _idx in idx]
                )
            else:
                graphs = []
                for _idx in idx:
                    graph = self[_idx]
                    # nested transforms
                    for transform in self.transforms:
                        graph = transform(graph)
                    graphs.append(graph)

                return self.__class__(graphs=graphs)

    def __iter__(self):
        if self.transforms is None:
            return iter(self.graphs)

        else:
            # TODO:
            # is this efficient?
            graphs = iter(self.graphs)
            for transform in self.transforms:
                graphs = map(transform, graphs)

            return graphs

    def shuffle(self, seed=None):
        import random
        from random import shuffle

        if seed is not None:
            random.seed(seed)

        shuffle(self.graphs)
        return self

    def apply(self, fn, in_place=False):
        r"""Apply functions to the elements of the dataset.

        Parameters
        ----------
        fn : callable

        Note
        ----
        If in_place is False, `fn` is added to the `transforms` else it is applied
        to elements and modifies them.

        """
        assert callable(fn)
        assert isinstance(in_place, bool)

        if in_place is False:  # add to list of transforms
            if self.transforms is None:
                self.transforms = []

            self.transforms.append(fn)

        else:  # modify in-place
            # self.graphs = list(map(fn, self.graphs))
            _graphs = []
            for graph in self.graphs:
                try:
                    _graphs.append(fn(graph))
                except:
                    pass
            self.graphs = _graphs

        return self  # to allow grammar: ds = ds.apply(...)

    def split(self, partition):
        """Split the dataset according to some partition.

        Parameters
        ----------
        partition : sequence of integers or floats

        """
        n_data = len(self)
        p_sizes = []
        for i, _partition in enumerate(partition):
            p_size = int((n_data - sum(p_sizes)) * _partition / sum(partition[i:]))
            p_sizes.append(p_size)
        assert sum(p_sizes) == n_data, f"{p_sizes}, {sum(p_sizes)}"
        ds = []
        idx = 0
        for p_size in p_sizes:
            ds.append(self[idx : idx + p_size])
            idx += p_size

        return ds

    def subsample(self, ratio, seed=None):
        """Subsample the dataset according to some ratio.

        Parameters
        ----------
        ratio : float
            Ratio between the size of the subsampled dataset and the
            original dataset.

        """
        n_data = len(self)
        idxs = list(range(n_data))
        import random

        random.seed(seed)
        _idxs = random.choices(idxs, k=int(n_data * ratio))
        return self[_idxs]

    def save(self, path):
        """Save dataset to path.

        Parameters
        ----------
        path : path-like object
        """
        import pickle

        with open(path, "wb") as f_handle:
            pickle.dump(self.graphs, f_handle)

    def regenerate_impropers(self, improper_def="smirnoff"):
        """
        Regenerate the improper nodes for all graphs.

        Parameters
        ----------
        improper_def : str
            Which convention to use for permuting impropers.
        """
        from espaloma.graphs.utils.regenerate_impropers import (
            regenerate_impropers,
        )

        for g in self.graphs:
            regenerate_impropers(g, improper_def)

    @classmethod
    def load(cls, path):
        """Load path to dataset.

        Parameters
        ----------
        """
        import pickle

        with open(path, "rb") as f_handle:
            graphs = pickle.load(f_handle)

        return cls(graphs)

    def __add__(self, x):
        return self.__class__(self.graphs + x.graphs)


class GraphDataset(Dataset):
    """Dataset with additional support for only viewing
    certain attributes as `torch.utils.data.DataLoader`

    Methods
    -------
    view(collate_fn, *args, **kwargs)
        Provide a `torch.utils.data.DataLoader` view of the dataset.

    Note
    """

    def __init__(self, graphs=[], first=None):
        super(GraphDataset, self).__init__()
        from openff.toolkit.topology import Molecule

        if all(
            isinstance(graph, Molecule) or isinstance(graph, str)
            for graph in graphs
        ):

            if first is None or first == -1:
                graphs = [esp.Graph(graph) for graph in graphs]

            else:
                graphs = [esp.Graph(graph) for graph in graphs[:first]]

        self.graphs = graphs

    @staticmethod
    def batch(graphs):
        import dgl

        if all(isinstance(graph, esp.graphs.graph.Graph) for graph in graphs):
            return dgl.batch([graph.heterograph for graph in graphs])

        elif all(isinstance(graph, dgl.DGLGraph) for graph in graphs):
            return dgl.batch(graphs)

        elif all(isinstance(graph, dgl.DGLHeteroGraph) for graph in graphs):
            return dgl.batch(graphs)

        else:
            raise RuntimeError(
                "Can only batch DGLGraph or DGLHeterograph,"
                "now have %s" % type(graphs[0])
            )

    def view(self, collate_fn="graph", *args, **kwargs):
        """Provide a data loader.

        Parameters
        ----------
        collate_fn : callable or string
            see `collate_fn` argument for `torch.utils.data.DataLoader`


        """
        if collate_fn == "graph":
            collate_fn = self.batch

        elif collate_fn == "homograph":

            def collate_fn(graphs):
                graph = self.batch([g.homograph for g in graphs])

                return graph

        elif collate_fn == "graph-typing":

            def collate_fn(graphs):
                graph = self.batch(graphs)
                y = graph.ndata["legacy_typing"]
                return graph, y

        elif collate_fn == "graph-typing-loss":
            loss_fn = torch.nn.CrossEntropyLoss()

            def collate_fn(graphs):
                graph = self.batch(graphs)
                loss = lambda _graph: loss_fn(
                    _graph.ndata["nn_typing"], graph.ndata["legacy_typing"]
                )
                return graph, loss

        return torch.utils.data.DataLoader(
            dataset=self, collate_fn=collate_fn, *args, **kwargs
        )

    def save(self, path):
        import os

        os.mkdir(path)
        for idx, graph in enumerate(self.graphs):
            graph.save(path + "/" + str(idx))

    @classmethod
    def load(cls, path):
        import os

        paths = os.listdir(path)
        paths = [_path for _path in paths]

        graphs = []
        for _path in paths:
            graphs.append(esp.Graph.load(path + "/" + _path))

        return cls(graphs)


================================================
FILE: espaloma/data/md.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import numpy as np
import torch

from openmmforcefields.generators import SystemGenerator
import openmm
from openmm import unit
from openmm.app import Simulation
from openmm.unit import Quantity

from espaloma.units import *
import espaloma as esp

# =============================================================================
# CONSTANTS
# =============================================================================
# simulation specs
TEMPERATURE = 350 * unit.kelvin
STEP_SIZE = 1.0 * unit.femtosecond
COLLISION_RATE = 1.0 / unit.picosecond
EPSILON_MIN = 0.05 * unit.kilojoules_per_mole

# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def add_nonbonded_force(
    g,
    forcefield="gaff-1.81",
    add_charges=True,
):

    # parameterize topology
    topology = g.mol.to_topology().to_openmm()

    generator = SystemGenerator(
        small_molecule_forcefield=forcefield,
        molecules=[g.mol],
        forcefield_kwargs={"constraints": None, "removeCMMotion": False},
    )

    # create openmm system
    system = generator.create_system(
        topology,
    )

    # use langevin integrator, although it's not super useful here
    integrator = openmm.LangevinIntegrator(
        TEMPERATURE, COLLISION_RATE, STEP_SIZE
    )

    # create simulation
    simulation = Simulation(
        topology=topology, system=system, integrator=integrator
    )

    # get forces
    forces = list(system.getForces())

    # loop through forces
    for force in forces:
        name = force.__class__.__name__

        # turn off angle
        if "Angle" in name:
            for idx in range(force.getNumAngles()):
                id1, id2, id3, angle, k = force.getAngleParameters(idx)
                force.setAngleParameters(idx, id1, id2, id3, angle, 0.0)

            force.updateParametersInContext(simulation.context)

        elif "Bond" in name:
            for idx in range(force.getNumBonds()):
                id1, id2, length, k = force.getBondParameters(idx)
                force.setBondParameters(
                    idx,
                    id1,
                    id2,
                    length,
                    0.0,
                )

            force.updateParametersInContext(simulation.context)

        elif "Torsion" in name:
            for idx in range(force.getNumTorsions()):
                (
                    id1,
                    id2,
                    id3,
                    id4,
                    periodicity,
                    phase,
                    k,
                ) = force.getTorsionParameters(idx)
                force.setTorsionParameters(
                    idx,
                    id1,
                    id2,
                    id3,
                    id4,
                    periodicity,
                    phase,
                    0.0,
                )

            force.updateParametersInContext(simulation.context)

        elif "Nonbonded" in name:
            if add_charges == False:
                for idx in range(force.getNumParticles()):
                    q, sigma, epsilon = force.getParticleParameters(idx)
                    force.setParticleParameters(idx, q * 1e-8, sigma, epsilon)
                for idx in range(force.getNumExceptions()):
                    (
                        idx0,
                        idx1,
                        q,
                        sigma,
                        epsilon,
                    ) = force.getExceptionParameters(idx)
                    force.setExceptionParameters(
                        idx, idx0, idx1, q * 1e-8, sigma, epsilon
                    )

                force.updateParametersInContext(simulation.context)

    # the snapshots
    xs = (
        Quantity(
            g.nodes["n1"].data["xyz"].detach().numpy(),
            esp.units.DISTANCE_UNIT,
        )
        .value_in_unit(unit.nanometer)
        .transpose((1, 0, 2))
    )

    # loop through the snapshots
    energies = []
    derivatives = []

    for x in xs:
        simulation.context.setPositions(x)

        state = simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            getForces=True,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT,
        )

        derivative = state.getForces(asNumpy=True).value_in_unit(
            esp.units.FORCE_UNIT,
        ) * -1

        energies.append(energy)
        derivatives.append(derivative)

    # put energies to a tensor
    energies = torch.tensor(
        energies,
        dtype=torch.get_default_dtype(),
    ).flatten()[None, :]
    derivatives = torch.tensor(
        np.stack(derivatives, axis=1),
        dtype=torch.get_default_dtype(),
    )

    # add the energies
    g.heterograph.apply_nodes(
        lambda node: {"u": node.data["u"] + energies},
        ntype="g",
    )
    return g


def get_coulomb_force(
    g,
    forcefield="gaff-1.81",
):
    # parameterize topology
    topology = g.mol.to_topology().to_openmm()

    generator = SystemGenerator(
        small_molecule_forcefield=forcefield,
        molecules=[g.mol],
        forcefield_kwargs={"constraints": None, "removeCMMotion": False},
    )

    # create openmm system
    system = generator.create_system(
        topology,
    )

    # use langevin integrator, although it's not super useful here
    integrator = openmm.LangevinIntegrator(
        TEMPERATURE, COLLISION_RATE, STEP_SIZE
    )

    # create simulation
    simulation = Simulation(
        topology=topology, system=system, integrator=integrator
    )

    # the snapshots
    xs = (
        Quantity(
            g.nodes["n1"].data["xyz"].detach().numpy(),
            esp.units.DISTANCE_UNIT,
        )
        .value_in_unit(unit.nanometer)
        .transpose((1, 0, 2))
    )

    # loop through the snapshots
    energies = []
    derivatives = []

    for x in xs:
        simulation.context.setPositions(x)

        state = simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            getForces=True,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT,
        )

        derivative = state.getForces(asNumpy=True).value_in_unit(
            esp.units.FORCE_UNIT,
        ) * -1

        energies.append(energy)
        derivatives.append(derivative)

    # put energies to a tensor
    energies = torch.tensor(
        energies,
        dtype=torch.get_default_dtype(),
    ).flatten()[None, :]
    derivatives = torch.tensor(
        np.stack(derivatives, axis=1),
        dtype=torch.get_default_dtype(),
    )

    # loop through forces
    forces = list(system.getForces())
    for force in forces:
        name = force.__class__.__name__
        if "Nonbonded" in name:
            force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)

            for idx in range(force.getNumParticles()):
                q, sigma, epsilon = force.getParticleParameters(idx)
                force.setParticleParameters(idx, q * 1e-8, sigma, epsilon)
            for idx in range(force.getNumExceptions()):
                idx0, idx1, q, sigma, epsilon = force.getExceptionParameters(
                    idx
                )
                force.setExceptionParameters(
                    idx, idx0, idx1, q * 1e-8, sigma, epsilon
                )
            force.updateParametersInContext(simulation.context)

    # the snapshots
    xs = (
        Quantity(
            g.nodes["n1"].data["xyz"].detach().numpy(),
            esp.units.DISTANCE_UNIT,
        )
        .value_in_unit(unit.nanometer)
        .transpose((1, 0, 2))
    )

    # loop through the snapshots
    new_energies = []
    new_derivatives = []

    for x in xs:
        simulation.context.setPositions(x)

        state = simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            getForces=True,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT,
        )

        derivative = state.getForces(asNumpy=True).value_in_unit(
            esp.units.FORCE_UNIT,
        ) * -1

        new_energies.append(energy)
        new_derivatives.append(derivative)

    # put energies to a tensor
    new_energies = torch.tensor(
        new_energies,
        dtype=torch.get_default_dtype(),
    ).flatten()[None, :]

    new_derivatives = torch.tensor(
        np.stack(new_derivatives, axis=1),
        dtype=torch.get_default_dtype(),
    )

    return energies - new_energies, derivatives - new_derivatives


def subtract_coulomb_force(
    g,
    forcefield="gaff-1.81",
):

    delta_energies, delta_derivatives = get_coulomb_force(
        g, forcefield=forcefield
    )

    # subtract the energies
    g.heterograph.apply_nodes(
        lambda node: {"u_ref": node.data["u_ref"] - delta_energies},
        ntype="g",
    )

    if "u_ref_prime" in g.nodes["n1"].data:
        g.heterograph.apply_nodes(
            lambda node: {
                "u_ref_prime": node.data["u_ref_prime"] - delta_derivatives
            },
            ntype="n1",
        )

    return g


def subtract_nonbonded_force(
    g,
    forcefield="gaff-1.81",
    subtract_charges=True,
):

    # parameterize topology
    topology = g.mol.to_topology().to_openmm()

    generator = SystemGenerator(
        small_molecule_forcefield=forcefield,
        molecules=[g.mol],
        forcefield_kwargs={"constraints": None, "removeCMMotion": False},
    )

    # create openmm system
    system = generator.create_system(
        topology,
    )

    # use langevin integrator, although it's not super useful here
    integrator = openmm.LangevinIntegrator(
        TEMPERATURE, COLLISION_RATE, STEP_SIZE
    )

    # create simulation
    simulation = Simulation(
        topology=topology, system=system, integrator=integrator
    )

    # get forces
    forces = list(system.getForces())

    # loop through forces
    for force in forces:
        name = force.__class__.__name__

        # turn off angle
        if "Angle" in name:
            for idx in range(force.getNumAngles()):
                id1, id2, id3, angle, k = force.getAngleParameters(idx)
                force.setAngleParameters(idx, id1, id2, id3, angle, 0.0)

            force.updateParametersInContext(simulation.context)

        elif "Bond" in name:
            for idx in range(force.getNumBonds()):
                id1, id2, length, k = force.getBondParameters(idx)
                force.setBondParameters(
                    idx,
                    id1,
                    id2,
                    length,
                    0.0,
                )

            force.updateParametersInContext(simulation.context)

        elif "Torsion" in name:
            for idx in range(force.getNumTorsions()):
                (
                    id1,
                    id2,
                    id3,
                    id4,
                    periodicity,
                    phase,
                    k,
                ) = force.getTorsionParameters(idx)
                force.setTorsionParameters(
                    idx,
                    id1,
                    id2,
                    id3,
                    id4,
                    periodicity,
                    phase,
                    0.0,
                )

            force.updateParametersInContext(simulation.context)

        elif "Nonbonded" in name:
            # only handle LJ potentials
            # subtract Coulomb interaction seperately with nocutoff method if substract_charges==True
            for idx in range(force.getNumParticles()):
                q, sigma, epsilon = force.getParticleParameters(idx)
                force.setParticleParameters(idx, q * 1e-8, sigma, epsilon)
            for idx in range(force.getNumExceptions()):
                idx0, idx1, q, sigma, epsilon = force.getExceptionParameters(
                    idx
                )
                force.setExceptionParameters(
                    idx, idx0, idx1, q * 1e-8, sigma, epsilon
                )

            force.updateParametersInContext(simulation.context)

    # the snapshots
    xs = (
        Quantity(
            g.nodes["n1"].data["xyz"].detach().numpy(),
            esp.units.DISTANCE_UNIT,
        )
        .value_in_unit(unit.nanometer)
        .transpose((1, 0, 2))
    )

    # loop through the snapshots
    energies = []
    derivatives = []

    for x in xs:
        simulation.context.setPositions(x)

        state = simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            getForces=True,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT,
        )

        derivative = state.getForces(asNumpy=True).value_in_unit(
            esp.units.FORCE_UNIT,
        ) * -1

        energies.append(energy)
        derivatives.append(derivative)

    # put energies to a tensor
    energies = torch.tensor(
        energies,
        dtype=torch.get_default_dtype(),
    ).flatten()[None, :]
    derivatives = torch.tensor(
        np.stack(derivatives, axis=1),
        dtype=torch.get_default_dtype(),
    )

    # subtract the energies
    g.heterograph.apply_nodes(
        lambda node: {"u_ref": node.data["u_ref"] - energies},
        ntype="g",
    )

    if "u_ref_prime" in g.nodes["n1"].data:
        g.heterograph.apply_nodes(
            lambda node: {
                "u_ref_prime": node.data["u_ref_prime"] - derivatives
            },
            ntype="n1",
        )

    if subtract_charges:
        g = subtract_coulomb_force(g)

    return g


def subtract_nonbonded_force_except_14(
    g,
    forcefield="gaff-1.81",
):

    # parameterize topology
    topology = g.mol.to_topology().to_openmm()

    generator = SystemGenerator(
        small_molecule_forcefield=forcefield,
        molecules=[g.mol],
    )

    # create openmm system
    system = generator.create_system(
        topology,
    )

    # use langevin integrator, although it's not super useful here
    integrator = openmm.LangevinIntegrator(
        TEMPERATURE, COLLISION_RATE, STEP_SIZE
    )

    # create simulation
    simulation = Simulation(
        topology=topology, system=system, integrator=integrator
    )

    # get forces
    forces = list(system.getForces())

    # loop through forces
    for force in forces:
        name = force.__class__.__name__

        # turn off angle
        if "Angle" in name:
            for idx in range(force.getNumAngles()):
                id1, id2, id3, angle, k = force.getAngleParameters(idx)
                force.setAngleParameters(idx, id1, id2, id3, angle, 0.0)

            force.updateParametersInContext(simulation.context)

        elif "Bond" in name:
            for idx in range(force.getNumBonds()):
                id1, id2, length, k = force.getBondParameters(idx)
                force.setBondParameters(
                    idx,
                    id1,
                    id2,
                    length,
                    0.0,
                )

            force.updateParametersInContext(simulation.context)

        elif "Torsion" in name:
            for idx in range(force.getNumTorsions()):
                (
                    id1,
                    id2,
                    id3,
                    id4,
                    periodicity,
                    phase,
                    k,
                ) = force.getTorsionParameters(idx)
                force.setTorsionParameters(
                    idx,
                    id1,
                    id2,
                    id3,
                    id4,
                    periodicity,
                    phase,
                    0.0,
                )

            force.updateParametersInContext(simulation.context)

        elif "Nonbonded" in name:
            for idx in range(force.getNumExceptions()):
                idx0, idx1, q, sigma, epsilon = force.getExceptionParameters(
                    idx
                )
                force.setExceptionParameters(
                    idx, idx0, idx1, q, sigma, epsilon * 1e-8
                )
            force.updateParametersInContext(simulation.context)

    # the snapshots
    xs = (
        Quantity(
            g.nodes["n1"].data["xyz"].detach().numpy(),
            esp.units.DISTANCE_UNIT,
        )
        .value_in_unit(unit.nanometer)
        .transpose((1, 0, 2))
    )

    # loop through the snapshots
    energies = []
    derivatives = []

    for x in xs:
        simulation.context.setPositions(x)

        state = simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            getForces=True,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT,
        )

        derivative = state.getForces(asNumpy=True).value_in_unit(
            esp.units.FORCE_UNIT,
        ) * -1

        energies.append(energy)
        derivatives.append(derivative)

    # put energies to a tensor
    energies = torch.tensor(
        energies,
        dtype=torch.get_default_dtype(),
    ).flatten()[None, :]
    derivatives = torch.tensor(
        np.stack(derivatives, axis=1),
        dtype=torch.get_default_dtype(),
    )

    # subtract the energies
    g.heterograph.apply_nodes(
        lambda node: {"u_ref": node.data["u_ref"] - energies},
        ntype="g",
    )

    if "u_ref_prime" in g.nodes["n1"].data:

        g.heterograph.apply_nodes(
            lambda node: {
                "u_ref_prime": node.data["u_ref_prime"] - derivatives
            },
            ntype="n1",
        )

    return g


# =============================================================================
# MODULE CLASSES
# =============================================================================
class MoleculeVacuumSimulation(object):
    """Simluate a single molecule system in vaccum.

    Parameters
    ----------
    g : `espaloma.Graph`
        Input molecular graph.

    n_samples : `int`
        Number of samples to collect.

    n_steps_per_sample : `int`
        Number of steps between each sample.

    temperature : `float * unit.kelvin`
        Temperature for the simluation.

    collision_rate : `float / unit.picosecond`
        Collision rate.

    timestep : `float * unit.femtosecond`
        Time step.

    Methods
    -------
    simulation_from_graph : Create simluation from molecule.

    run : Run the simluation.

    """

    def __init__(
        self,
        forcefield="gaff-1.81",
        n_samples=100,
        n_conformers=10,
        n_steps_per_sample=1000,
        temperature=TEMPERATURE,
        collision_rate=COLLISION_RATE,
        step_size=STEP_SIZE,
        charge_method=None,
    ):

        self.n_samples = n_samples
        self.n_steps_per_sample = n_steps_per_sample
        self.temperature = temperature
        self.collision_rate = collision_rate
        self.step_size = step_size
        self.forcefield = forcefield
        self.n_conformers = n_conformers
        self.charge_method = charge_method

    def simulation_from_graph(self, g):
        """Create simulation from moleucle"""
        # assign partial charge
        if self.charge_method is not None:
            g.mol.assign_partial_charges(self.charge_method)

        # parameterize topology
        topology = g.mol.to_topology().to_openmm()

        generator = SystemGenerator(
            small_molecule_forcefield=self.forcefield,
            molecules=[g.mol],
        )

        # create openmm system
        system = generator.create_system(
            topology,
        )

        # set epsilon minimum to 0.05 kJ/mol
        for force in system.getForces():
            if "Nonbonded" in force.__class__.__name__:
                force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)
                for particle_index in range(force.getNumParticles()):
                    charge, sigma, epsilon = force.getParticleParameters(
                        particle_index
                    )
                    if epsilon < EPSILON_MIN:
                        force.setParticleParameters(
                            particle_index, charge, sigma, EPSILON_MIN
                        )

        # use langevin integrator
        integrator = openmm.LangevinIntegrator(
            self.temperature, self.collision_rate, self.step_size
        )

        # initialize simulation
        simulation = Simulation(
            topology=topology,
            system=system,
            integrator=integrator,
            platform=openmm.Platform.getPlatformByName("Reference"),
        )

        return simulation

    def run(self, g, in_place=True):
        """Collect samples from simulation.

        Parameters
        ----------
        g : `esp.Graph`
            Input graph.

        in_place : `bool`
            If ture,

        Returns
        -------
        samples : `torch.Tensor`, `shape=(n_samples, n_nodes, 3)`
            `in_place=True`
            Sample.

        graph : `esp.Graph`
            Modified graph.

        """
        # build simulation
        simulation = self.simulation_from_graph(g)

        import openff.toolkit

        # get conformer
        g.mol.generate_conformers(
            toolkit_registry=openff.toolkit.utils.RDKitToolkitWrapper(),
            n_conformers=self.n_conformers,
        )

        # get number of actual conformers
        true_n_conformers = len(g.mol.conformers)

        samples = []
        for idx in range(true_n_conformers):
            # put conformer in simulation
            simulation.context.setPositions(g.mol.conformers[idx].to_openmm())

            # set velocities
            simulation.context.setVelocitiesToTemperature(self.temperature)

            # minimize
            simulation.minimizeEnergy()

            # loop through number of samples
            for _ in range(self.n_samples // self.n_conformers):

                # run MD for `self.n_steps_per_sample` steps
                simulation.step(self.n_steps_per_sample)

                # append samples to `samples`
                samples.append(
                    simulation.context.getState(getPositions=True)
                    .getPositions(asNumpy=True)
                    .value_in_unit(DISTANCE_UNIT)
                )

        # if the `samples` array is not filled,
        # pick a random conformer to do it again
        if len(samples) < self.n_samples:
            len_samples = len(samples)
            import random

            idx = random.choice(list(range(true_n_conformers)))
            simulation.context.setPositions(g.mol.conformers[idx].to_openmm())

            # set velocities
            simulation.context.setVelocitiesToTemperature(self.temperature)

            # minimize
            simulation.minimizeEnergy()

            # loop through number of samples
            for _ in range(self.n_samples - len_samples):

                # run MD for `self.n_steps_per_sample` steps
                simulation.step(self.n_steps_per_sample)

                # append samples to `samples`
                samples.append(
                    simulation.context.getState(getPositions=True)
                    .getPositions(asNumpy=True)
                    .value_in_unit(DISTANCE_UNIT)
                )

        assert len(samples) == self.n_samples

        # put samples into an array
        samples = np.array(samples)

        # put samples into tensor
        samples = torch.tensor(samples, dtype=torch.float32)

        if in_place is True:
            g.heterograph.nodes["n1"].data["xyz"] = samples.permute(1, 0, 2)

            # require gradient for force matching
            g.heterograph.nodes["n1"].data["xyz"].requires_grad = True

            return g

        return samples


================================================
FILE: espaloma/data/md17_utils.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import numpy as np
import torch
import espaloma as esp
from openmm import unit
from openmm.unit import Quantity

# =============================================================================
# CONSTANTS
# =============================================================================
MOLECULES = {
    "benzene": "C1=CC=CC=C1",
    "uracil": "O=C1NC=CC(=O)N1",
    "naphthalene": "C1=CC=C2C=CC=CC2=C1",
    "aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
    "salicylic": "C1=CC=C(C(=C1)C(=O)O)O",
    "malonaldehyde": "C(C=O)C=O",
    "ethanol": "CCO",
    "toluene": "CC1=CC=CC=C1",
    "paracetamol": "CC(=O)NC1=CC=C(C=C1)O",
    "azobenzene": "C1=CC=C(C=C1)N=NC2=CC=CC=C2",
}

OFFSETS = {
    1: -0.500607632585,
    6: -37.8302333826,
    7: -54.5680045287,
    8: -75.0362229210,
}

# ==============================================================================
# UTILITY FUNCTIONS
# ==============================================================================
def sum_offsets(elements):
    return sum([OFFSETS[element] for element in elements])


def realize_molecule(
    data, name, smiles=None, first=-1, subtract_nonbonded=True
):
    elements = data["z"].tolist()

    offset = sum_offsets(elements)

    g = esp.data.utils.infer_mol_from_coordinates(
        data["R"][0],
        elements,
        smiles,
    )

    g.nodes["n1"].data["xyz"] = torch.tensor(
        Quantity(
            data["R"].transpose(1, 0, 2),
            unit.angstrom,
        ).value_in_unit(esp.units.DISTANCE_UNIT),
        requires_grad=True,
    )[:, :first, :]

    g.nodes["g"].data["u_ref"] = (
        torch.tensor(
            Quantity(
                data["E"],
                unit.kilocalorie_per_mole,
            ).value_in_unit(esp.units.ENERGY_UNIT)
        ).transpose(1, 0)[:, :first]
        - offset
    )

    g.nodes["n1"].data["u_ref_prime"] = torch.tensor(
        Quantity(
            data["F"],
            unit.kilocalorie_per_mole / unit.angstrom,
        ).value_in_unit(esp.units.FORCE_UNIT)
    ).transpose(1, 0)[:, :first, :]

    if subtract_nonbonded is True:
        g = esp.data.md.subtract_nonbonded_force(g)

    return g


def get_molecule(name, *args, **kwargs):
    if name == "benzene":
        file_name = "benzene_old_dft.npz"
    else:
        file_name = "%s_dft.npz" % name

    from os.path import exists

    if not exists(file_name):
        url = "http://www.quantum-machine.org/gdml/data/npz/%s" % file_name
        print(url)
        import urllib.request

        urllib.request.urlretrieve(url, file_name)

    data = np.load(file_name)

    smiles = MOLECULES[name]

    g = realize_molecule(data, name, smiles, *args, **kwargs)

    return g


================================================
FILE: espaloma/data/normalize.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import abc

import torch

import espaloma as esp


# =============================================================================
# BASE CLASSES
# =============================================================================
class BaseNormalize(abc.ABC):
    """Base class for normalizing operation."""

    def __init__(self):
        super(BaseNormalize, self).__init__()

    @abc.abstractmethod
    def _prepare(self):
        # NOTE:
        # `_norm` and `_unnorm` are assigned here
        raise NotImplementedError


# =============================================================================
# MODULE CLASSES
# =============================================================================
class DatasetNormalNormalize(BaseNormalize):
    """Normalizing operation based on a dataset of molecules,
    assuming parameters having normal distribution.

    Parameters
    ----------
    dataset : `espaloma.data.dataset.Dataset`
        The dataset we base on to calculate the statistics of parameter
        distributions.

    Attributes
    ----------
    norm : normalize function

    unnorm : unnormalize function

    """

    def __init__(self, dataset):
        super(DatasetNormalNormalize, self).__init__()
        self.dataset = dataset
        self._prepare()

    def _prepare(self):
        """Calculate the statistics from dataset"""
        # grab the collection of graphs in the dataset, batched
        g = self.dataset.batch(self.dataset.graphs)

        self.statistics = {term: {} for term in ["n1", "n2", "n3", "n4"]}

        # calculate statistics
        for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
            for key in g.nodes[term].data.keys():  # loop through parameters
                if not key.endswith("ref"):  # pass non-parameters
                    continue

                self.statistics[term][
                    key.replace("_ref", "_mean")
                ] = torch.mean(g.nodes[term].data[key], axis=0)

                self.statistics[term][
                    key.replace("_ref", "_std")
                ] = torch.std(g.nodes[term].data[key], axis=0)

        # get normalize and unnormalize functions
        def norm(g):
            for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
                for key in g.nodes[
                    term
                ].data.keys():  # loop through parameters
                    if not key.endswith("ref"):  # pass non-parameters
                        continue

                    g.nodes[term].data[key] = (
                        g.nodes[term].data[key]
                        - self.statistics[term][key.replace("_ref", "_mean")]
                    ) / self.statistics[term][key.replace("_ref", "_std")]

            return g

        def unnorm(g):
            for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
                for key in g.nodes[
                    term
                ].data.keys():  # loop through parameters

                    if key + "_mean" in self.statistics[term]:

                        g.nodes[term].data[key] = (
                            g.nodes[term].data[key]
                            * self.statistics[term][key + "_std"]
                            + self.statistics[term][key + "_mean"]
                        )
                    #
                    # elif '_ref' in key \
                    #     and key.replace('_ref', '_mean')\
                    #     in self.statistics[term]:
                    #
                    #     g.nodes[term].data[key]\
                    #         = g.nodes[term].data[key]\
                    #             * self.statistics[term][
                    #                 key.replace('_ref', '_std')]\
                    #             + self.statistics[term][
                    #                 key.replace('_ref', '_mean')]

            return g

        # point normalize and unnormalize functions to `self`
        self.norm = norm
        self.unnorm = unnorm


class DatasetLogNormalNormalize(BaseNormalize):
    """Normalizing operation based on a dataset of molecules,
    assuming parameters having log normal distribution.

    Parameters
    ----------
    dataset : `espaloma.data.dataset.Dataset`
        The dataset we base on to calculate the statistics of parameter
        distributions.

    Attributes
    ----------
    norm : normalize function

    unnorm : unnormalize function

    """

    def __init__(self, dataset):
        super(DatasetLogNormalNormalize, self).__init__()
        self.dataset = dataset
        self._prepare()

    def _prepare(self):
        """Calculate the statistics from dataset"""
        # grab the collection of graphs in the dataset, batched
        g = self.dataset.batch(self.dataset.graphs)

        self.statistics = {term: {} for term in ["n1", "n2", "n3", "n4"]}

        # calculate statistics
        for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
            for key in g.nodes[term].data.keys():  # loop through parameters
                if not key.endswith("ref"):  # pass non-parameters
                    continue

                self.statistics[term][
                    key.replace("_ref", "_mean")
                ] = torch.mean(g.nodes[term].data[key].log(), axis=0)

                self.statistics[term][
                    key.replace("_ref", "_std")
                ] = torch.std(g.nodes[term].data[key].log(), axis=0)

        # get normalize and unnormalize functions
        def norm(g):
            for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
                for key in g.nodes[
                    term
                ].data.keys():  # loop through parameters
                    if not key.endswith("ref"):  # pass non-parameters
                        continue

                    g.nodes[term].data[key] = (
                        g.nodes[term].data[key].log()
                        - self.statistics[term][key.replace("_ref", "_mean")]
                    ) / self.statistics[term][key.replace("_ref", "_std")]

            return g

        def unnorm(g):
            for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
                for key in g.nodes[
                    term
                ].data.keys():  # loop through parameters

                    if key + "_mean" in self.statistics[term]:

                        g.nodes[term].data[key] = torch.exp(
                            g.nodes[term].data[key]
                            * self.statistics[term][key + "_std"].to(
                                g.nodes[term].data[key].device
                            )
                            + self.statistics[term][key + "_mean"].to(
                                g.nodes[term].data[key].device
                            )
                        )
                    #
                    # elif '_ref' in key \
                    #     and key.replace('_ref', '_mean')\
                    #     in self.statistics[term]:
                    #
                    #     g.nodes[term].data[key]\
                    #         = torch.exp(
                    #             g.nodes[term].data[key]\
                    #                 * self.statistics[term][
                    #                     key.replace('_ref', '_std')]\
                    #                 + self.statistics[term][
                    #                     key.replace('_ref', '_mean')])

            return g

        # point normalize and unnormalize functions to `self`
        self.norm = norm
        self.unnorm = unnorm


# =============================================================================
# PRESETS
# =============================================================================
class ESOL100NormalNormalize(DatasetNormalNormalize):
    def __init__(self):
        super(ESOL100NormalNormalize, self).__init__(
            dataset=esp.data.esol(first=100).apply(
                esp.graphs.legacy_force_field.LegacyForceField(
                    "smirnoff99Frosst-1.1.0"
                ).parametrize,
                in_place=True,
            )
        )


class ESOL100LogNormalNormalize(DatasetLogNormalNormalize):
    def __init__(self):
        super(ESOL100LogNormalNormalize, self).__init__(
            dataset=esp.data.esol(first=100).apply(
                esp.graphs.legacy_force_field.LegacyForceField(
                    "smirnoff99Frosst-1.1.0"
                ).parametrize,
                in_place=True,
            )
        )


class NotNormalize(BaseNormalize):
    def __init__(self):
        super(NotNormalize).__init__()
        self._prepare()

    def _prepare(self):
        self.norm = lambda x: x
        self.unnorm = lambda x: x


class PositiveNotNormalize(BaseNormalize):
    def __init__(self):
        super(PositiveNotNormalize, self).__init__()
        self._prepare()

    def _prepare(self):

        # get normalize and unnormalize functions
        def norm(g):
            for term in ["n1", "n2", "n3", "n4"]:  # loop through terms
                for key in g.nodes[
                    term
                ].data.keys():  # loop through parameters
                    if not key.endswith("ref"):  # pass non-parameters
                        continue

                    g.nodes[term].data[key] = g.nodes[term].data[key].log()

            return g

        def unnorm(g):
            for term in [
                "n2",
                "n3",
            ]:  # loop through terms
                for key in g.nodes[
                    term
                ].data.keys():  # loop through parameters
                    if key == "k" or key == "eq":

                        g.nodes[term].data[key] = torch.exp(
                            g.nodes[term].data[key]
                        )

            return g

        # point normalize and unnormalize functions to `self`
        self.norm = norm
        self.unnorm = unnorm


================================================
FILE: espaloma/data/off-mol_0_10_6.json
================================================
"{\"name\": \"\", \"atoms\": [{\"atomic_number\": 8, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 16, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 17, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}], \"virtual_sites\": [], \"bonds\": [{\"atom1\": 0, \"atom2\": 1, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 1, \"atom2\": 2, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 2, \"atom2\": 3, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 3, \"atom2\": 4, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 4, \"atom2\": 5, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 5, \"atom2\": 6, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 6, \"atom2\": 7, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 7, \"atom2\": 8, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 8, \"atom2\": 9, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 9, \"atom2\": 10, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 10, \"atom2\": 11, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 1, \"atom2\": 12, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 12, \"atom2\": 13, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 13, \"atom2\": 14, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 14, \"atom2\": 15, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 15, \"atom2\": 16, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 16, \"atom2\": 17, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 17, \"atom2\": 18, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 12, \"atom2\": 19, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 19, \"atom2\": 20, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 20, \"atom2\": 21, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 21, \"atom2\": 22, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 22, \"atom2\": 23, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 22, \"atom2\": 24, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 24, \"atom2\": 25, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 11, \"atom2\": 3, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 18, \"atom2\": 14, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 25, \"atom2\": 19, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 11, \"atom2\": 6, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 2, \"atom2\": 26, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 2, \"atom2\": 27, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 7, \"atom2\": 28, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 8, \"atom2\": 29, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 9, \"atom2\": 30, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 10, \"atom2\": 31, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 13, \"atom2\": 32, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 13, \"atom2\": 33, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 15, \"atom2\": 34, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 16, \"atom2\": 35, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 18, \"atom2\": 36, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 20, \"atom2\": 37, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 21, \"atom2\": 38, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 24, \"atom2\": 39, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 25, \"atom2\": 40, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}], \"properties\": {}, \"conformers\": null, \"partial_charges\": null, \"partial_charges_unit\": null}"

================================================
FILE: espaloma/data/qcarchive_utils.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
from collections import namedtuple
from typing import Tuple

import numpy as np
import qcportal
import torch
from openmm import unit
from openmm.unit import Quantity

import espaloma as esp


# =============================================================================
# CONSTANTS
# =============================================================================


# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def get_client(url: str = "api.qcarchive.molssi.org") -> qcportal.client.PortalClient:
    """
    Returns a instance of the qcportal client.

    Parameters
    ----------
    url: str, default="api.qcarchive.molssi.org"
        qcportal instance to connect

    Returns
    -------
    qcportal.client.PortalClient
        qcportal client instance.
    """
    # Note, this may need to be modified to include username/password for non-public servers
    return qcportal.PortalClient(url)


def get_collection(
        client,
        collection_type="optimization",
        name="OpenFF Full Optimization Benchmark 1",
):
    """
    Connects to a specific dataset on qcportal

    Parameters
    ----------
    client: qcportal.client, required
        The qcportal client instance
    collection_type: str, default="optimization"
        The type of qcarchive collection, options are
        "torsiondrive", "optimization", "gridoptimization", "reaction", "singlepoint" "manybody"
    name: str, default="OpenFF Full Optimization Benchmark 1"
        Name of the dataset

    Returns
    -------
    (qcportal dataset, list(str))
        Tuple with an instance of qcportal dataset and list of record names

    """
    collection = client.get_dataset(
        dataset_type=collection_type,
        dataset_name=name,
    )

    record_names = collection.entry_names

    return collection, record_names


def process_record(record, entry):
    """
    Processes a given record/entry pair from a dataset and returns the graph

    Parameters
    ----------
    record: qcportal.optimization.record_models.OptimizationRecord
        qcportal record
    entry: cportal.optimization.dataset_models.OptimizationDatasetEntry
        qcportal entry

    Returns
    -------
        esp.Graph
    """

    from openff.toolkit.topology import Molecule

    if record.record_type == "optimization":
        trajectory = record.trajectory
        if trajectory is None:
            return None
    else:
        raise Exception(
            f"{record.record_type} is not supported: only optimization datasets can be processed."
        )
    mol = Molecule.from_qcschema(entry.dict())
    g = esp.Graph(mol)

    # energy is already hartree
    g.nodes["g"].data["u_ref"] = torch.tensor(
        [
            Quantity(
                snapshot.properties["scf_total_energy"],
                esp.units.HARTREE_PER_PARTICLE,
            ).value_in_unit(esp.units.ENERGY_UNIT)
            for snapshot in trajectory
        ],
        dtype=torch.get_default_dtype(),
    )[None, :]

    g.nodes["n1"].data["xyz"] = torch.tensor(
        np.stack(
            [
                Quantity(
                    snapshot.molecule.geometry,
                    unit.bohr,
                ).value_in_unit(esp.units.DISTANCE_UNIT)
                for snapshot in trajectory
            ],
            axis=1,
        ),
        requires_grad=True,
        dtype=torch.get_default_dtype(),
    )

    g.nodes["n1"].data["u_ref_prime"] = torch.stack(
        [
            torch.tensor(
                Quantity(
                    np.array(snapshot.properties["return_result"]).reshape((-1, 3)),
                    esp.units.HARTREE_PER_PARTICLE / unit.bohr,
                ).value_in_unit(esp.units.FORCE_UNIT),
                dtype=torch.get_default_dtype(),
            )
            for snapshot in trajectory
        ],
        dim=1,
    )

    return g


def get_graph(collection, record_name, spec_name="default"):
    """
    Processes the qcportal data for a given record name.

    This supports optimization and singlepoint datasets.

    Parameters
    ----------
    collection, qcportal dataset, required
        The instance of the qcportal dataset
    record_name, str, required
        The name of a give record
    spec_name, str, default="default"
        Retrieve data for a given qcportal specification.
    Returns
    -------
        Graph
    """
    # get record and trajectory
    record = collection.get_record(record_name, specification_name=spec_name)
    entry = collection.get_entry(record_name)

    g = process_record(record, entry)

    return g


def get_graphs(collection, record_names, spec_name="default"):
    """
    Processes the qcportal data for a given set of record names.
    This uses the qcportal iteration functions which are faster than processing
    records one at a time.

    This supports optimization and singlepoint datasets.


    Parameters
    ----------
    collection, qcportal dataset, required
        The instance of the qcportal dataset
    record_name, List[str], required
        A list of the record_names of a give record
    spec_name, str, default="default"
        Retrieve data for a given qcportal specification.
    Returns
    -------
    list(graph)
        Returns a list of the corresponding graph for each record name
    """
    g_list = []
    for record, entry in zip(
            collection.iterate_records(record_names, specification_names=[spec_name]),
            collection.iterate_entries(record_names),
    ):
        # note iterate records returns a tuple of length 3 (name, spec_name, actual record information)

        g = process_record(record[2], entry)
        g_list.append(g)

    return g_list


def fetch_td_record(record: qcportal.torsiondrive.record_models.TorsiondriveRecord):
    """
    Fetches configuration, energy, and gradients for a given torsiondrive record as a function of different angles.

    Parameters
    ----------
    record: qcportal.torsiondrive.record_models.TorsiondriveRecord, required
        Torsiondrive record of interest
    Returns
    -------
    tuple, ( numpy.array, numpy.array, numpy.array,numpy.array)
        Returned data is a tuple of numpy arrays.
        The first index contains angles and subsequent arrays represent
        molecule coordinate, energy and gradients associated with each angle.

    """
    molecule_optimization = record.optimizations

    angle_keys = list(molecule_optimization.keys())

    xyzs = []
    energies = []
    gradients = []

    for angle in angle_keys:
        # NOTE: this is calling the first index of the optimization array
        # this gives the same value as the prior implementation.
        # however it seems to be that this contains multiple different initial configurations
        # that have been optimized.  Should all conformers and energies/gradients be considered?
        mol = molecule_optimization[angle][0].final_molecule
        result = molecule_optimization[angle][0].trajectory[-1].properties

        """Note: force = - gradient"""

        # TODO: attach units here? or later?

        e = result["current energy"]
        g = np.array(result["current gradient"]).reshape(-1, 3)

        xyzs.append(mol.geometry)
        energies.append(e)
        gradients.append(g)

    # to arrays
    xyz = np.array(xyzs)
    energies = np.array(energies)
    gradients = np.array(gradients)

    # assume each angle key is a tuple -- sort by first angle in tuple

    # NOTE: (for now making the assumption that these torsion drives are 1D)
    for k in angle_keys:
        assert len(k) == 1

    to_ordered = np.argsort([k[0] for k in angle_keys])
    angles_in_order = [angle_keys[i_] for i_ in to_ordered]
    flat_angles = np.array(angles_in_order).flatten()

    # put the xyz's, energies, and gradients in the same order as the angles
    xyz_in_order = xyz[to_ordered]
    energies_in_order = energies[to_ordered]
    gradients_in_order = gradients[to_ordered]

    # TODO: put this return blob into a better struct
    return flat_angles, xyz_in_order, energies_in_order, gradients_in_order


MolWithTargets = namedtuple(
    "MolWithTargets", ["offmol", "xyz", "energies", "gradients"]
)


def h5_to_dataset(df):
    def get_smiles(x):
        try:
            return x["offmol"].to_smiles()
        except:
            return np.nan

    df["smiles"] = df.apply(get_smiles, axis=1)
    df = df.dropna()
    groups = df.groupby("smiles")
    gs = []
    for name, group in groups:
        mol_ref = group["offmol"][0]
        assert all(mol_ref == entry for entry in group["offmol"])
        g = esp.Graph(mol_ref)

        u_ref = np.concatenate(group["energies"].values)
        u_ref_prime = np.concatenate(group["gradients"].values, axis=0).transpose(
            1, 0, 2
        )
        xyz = np.concatenate(group["xyz"].values, axis=0).transpose(1, 0, 2)

        assert u_ref_prime.shape[0] == xyz.shape[0] == mol_ref.n_atoms
        assert u_ref.shape[0] == u_ref_prime.shape[1] == xyz.shape[1]

        # energy is already hartree
        g.nodes["g"].data["u_ref"] = torch.tensor(
            Quantity(u_ref, esp.units.HARTREE_PER_PARTICLE).value_in_unit(
                esp.units.ENERGY_UNIT
            ),
            dtype=torch.get_default_dtype(),
        )[None, :]

        g.nodes["n1"].data["xyz"] = torch.tensor(
            Quantity(
                xyz,
                unit.bohr,
            ).value_in_unit(esp.units.DISTANCE_UNIT),
            requires_grad=True,
            dtype=torch.get_default_dtype(),
        )

        g.nodes["n1"].data["u_ref_prime"] = torch.tensor(
            Quantity(
                u_ref_prime,
                esp.units.HARTREE_PER_PARTICLE / unit.bohr,
            ).value_in_unit(esp.units.FORCE_UNIT),
            dtype=torch.get_default_dtype(),
        )

        gs.append(g)

    return esp.data.dataset.GraphDataset(gs)


def breakdown_along_time_axis(g, batch_size=32):
    n_snapshots = g.nodes["g"].data["u_ref"].flatten().shape[0]
    idxs = list(range(n_snapshots))
    from random import shuffle

    shuffle(idxs)
    chunks = [
        idxs[_idx * batch_size: (_idx + 1) * batch_size]
        for _idx in range(n_snapshots // batch_size)
    ]

    _gs = []
    for chunk in chunks:
        _g = esp.Graph(g.mol)
        _g.nodes["g"].data["u_ref"] = (
            g.nodes["g"].data["u_ref"][:, chunk].detach().clone()
        )
        _g.nodes["n1"].data["xyz"] = (
            g.nodes["n1"].data["xyz"][:, chunk, :].detach().clone()
        )
        _g.nodes["n1"].data["u_ref_prime"] = (
            g.nodes["n1"].data["u_ref_prime"][:, chunk, :].detach().clone()
        )

        _g.nodes["n1"].data["xyz"].requires_grad = True

        _gs.append(_g)

    return _gs


def make_batch_size_consistent(ds, batch_size=32):
    import itertools

    return esp.data.dataset.GraphDataset(
        list(
            itertools.chain.from_iterable(
                [breakdown_along_time_axis(g, batch_size=batch_size) for g in ds]
            )
        )
    )


def weight_by_snapshots(g, key="weight"):
    n_snapshots = g.nodes["n1"].data["xyz"].shape[1]
    g.nodes["g"].data[key] = torch.tensor(float(1.0 / n_snapshots))[None, :]


================================================
FILE: espaloma/data/tests/test_collection.py
================================================
import pytest


@pytest.fixture
def esol():
    import espaloma as esp

    return esp.data.esol(first=16)


def test_view(esol):
    view = esol.view(batch_size=4)
    import dgl

    graphs = list(view)
    assert len(graphs) == 4
    assert all(isinstance(graph, dgl.DGLHeteroGraph) for graph in graphs)


def test_typing(esol):
    import espaloma as esp

    typing = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81")
    esol = esol.apply(typing, in_place=True)
    view = esol.view(batch_size=4)
    for g in view:
        assert g.nodes["n1"].data["legacy_typing"].shape[
            0
        ] == g.number_of_nodes(ntype="n1")


================================================
FILE: espaloma/data/tests/test_dataset.py
================================================
import pytest


def test_tiny_dataset():
    import espaloma as esp

    xs = list(range(5))
    ds = esp.data.dataset.Dataset(xs)


@pytest.fixture
def ds():
    xs = list(range(5))
    import espaloma as esp

    return esp.data.dataset.Dataset(xs)


def test_get(ds):
    assert ds[0] == 0


def test_len(ds):
    assert len(ds) == 5


def test_iter(ds):
    assert all(x == x_ for (x, x_) in zip(ds, range(5)))


def test_slice(ds):
    import espaloma as esp

    sub_ds = ds[:2]
    assert isinstance(ds, esp.data.dataset.Dataset)
    assert len(sub_ds) == 2


def test_split(ds):
    a, b = ds.split([1, 4])
    assert len(a) == 1
    assert len(b) == 4


@pytest.fixture
def ds_new(ds):
    fn = lambda x: x + 1
    return ds.apply(fn)


def test_no_change(ds_new):
    assert all(x == x_ for (x, x_) in zip(ds_new.graphs, range(5)))


def test_get_new(ds_new):
    assert ds_new[0] == 1


def test_len_new(ds_new):
    assert len(ds_new) == 5


def test_iter_new(ds_new):
    assert all(x == x_ + 1 for (x, x_) in zip(ds_new, range(5)))


@pytest.fixture
def ds_newer(ds):
    fn = lambda x: x + 1
    return ds.apply(fn).apply(fn)


def test_iter_newer(ds_newer):
    assert all(x == x_ + 2 for (x, x_) in zip(ds_newer, range(5)))


def test_no_return(ds):
    fn = lambda x: x + 1
    ds.apply(fn).apply(fn)
    assert all(x == x_ + 2 for (x, x_) in zip(ds, range(5)))


def test_subsample(ds):
    _ds = ds.subsample(0.2)
    assert len(_ds) == 1


================================================
FILE: espaloma/data/tests/test_md.py
================================================
import pytest
import torch


def test_init():
    import espaloma.data.md


@pytest.fixture
def graph():
    import espaloma as esp

    graph = esp.Graph("c1ccccc1")
    return graph


@pytest.fixture
def ds():
    import espaloma as esp

    ds = esp.data.esol(first=10)
    return ds


def test_system(graph):
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation()


def test_run(graph):
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)

    samples = simulation.run(graph, in_place=False)

    assert samples.shape == torch.Size([10, 12, 3])


def test_run_in_place(graph):
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)

    graph = simulation.run(graph, in_place=True)

    assert graph.nodes["n1"].data["xyz"].shape == torch.Size([12, 10, 3])


def test_apply(ds):
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(
        n_samples=1, n_steps_per_sample=1
    ).run

    ds.apply(simulation, in_place=True)

    assert ds.graphs[0].nodes["n1"].data["xyz"].shape[-1] == 3
    assert ds.graphs[0].nodes["n1"].data["xyz"].shape[-2] == 1


================================================
FILE: espaloma/data/tests/test_normalize.py
================================================
import numpy.testing as npt
import pytest


def test_import():
    from espaloma.data.normalize import BaseNormalize


def test_normalize_esol():
    import espaloma as esp

    normalize = esp.data.normalize.DatasetNormalNormalize(
        dataset=esp.data.esol(first=10).apply(
            esp.graphs.legacy_force_field.LegacyForceField(
                "smirnoff99Frosst-1.1.0"
            ).parametrize,
            in_place=True,
        )
    )


def test_log_normalize_esol():
    import espaloma as esp

    normalize = esp.data.normalize.DatasetLogNormalNormalize(
        dataset=esp.data.esol(first=10).apply(
            esp.graphs.legacy_force_field.LegacyForceField(
                "smirnoff99Frosst-1.1.0"
            ).parametrize,
            in_place=True,
        )
    )


def test_normal_normalize_reproduce():
    import espaloma as esp

    normalize = esp.data.normalize.DatasetNormalNormalize(
        dataset=esp.data.esol(first=10).apply(
            esp.graphs.legacy_force_field.LegacyForceField(
                "smirnoff99Frosst-1.1.0"
            ).parametrize,
            in_place=True,
        )
    )

    esol = esp.data.esol(first=1)

    # do some typing
    param = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    ).parametrize
    esol.apply(param, in_place=True)  # this modify the original data

    g = esol[0]

    import copy

    g_ = copy.deepcopy(g)

    g = normalize.norm(g)

    g.nodes["n2"].data["k"] = g.nodes["n2"].data["k_ref"]
    g.nodes["n2"].data["eq"] = g.nodes["n2"].data["eq_ref"]

    g = normalize.unnorm(g)

    npt.assert_almost_equal(
        g.nodes["n2"].data["k"].detach().numpy(),
        g_.nodes["n2"].data["k_ref"].detach().numpy(),
    )

    npt.assert_almost_equal(
        g.nodes["n2"].data["eq"].detach().numpy(),
        g_.nodes["n2"].data["eq_ref"].detach().numpy(),
    )


def test_log_normal_normalize_reproduce():
    import espaloma as esp

    normalize = esp.data.normalize.DatasetLogNormalNormalize(
        dataset=esp.data.esol(first=10).apply(
            esp.graphs.legacy_force_field.LegacyForceField(
                "smirnoff99Frosst-1.1.0"
            ).parametrize,
            in_place=True,
        )
    )

    esol = esp.data.esol(first=1)

    # do some typing
    param = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    ).parametrize
    esol.apply(param, in_place=True)  # this modify the original data

    g = esol[0]

    import copy

    g_ = copy.deepcopy(g)

    g = normalize.norm(g)

    g.nodes["n2"].data["k"] = g.nodes["n2"].data["k_ref"]
    g.nodes["n2"].data["eq"] = g.nodes["n2"].data["eq_ref"]

    g = normalize.unnorm(g)

    npt.assert_almost_equal(
        g.nodes["n2"].data["k"].detach().numpy(),
        g_.nodes["n2"].data["k_ref"].detach().numpy(),
        decimal=1,
    )

    npt.assert_almost_equal(
        g.nodes["n2"].data["eq"].detach().numpy(),
        g_.nodes["n2"].data["eq_ref"].detach().numpy(),
        decimal=1,
    )


================================================
FILE: espaloma/data/tests/test_qcarchive.py
================================================
import pytest


def test_import():
    import espaloma.data.qcarchive_utils


def test_get_graph():
    from espaloma.data import qcarchive_utils

    client = qcarchive_utils.get_client()
    collection, record_names = qcarchive_utils.get_collection(client)
    # The order records are received is not guaranteed, and can change if,
    # e.g., the underlying database ends up being replaced by a copy during a database migration.
    # as such we need to use a specific record name.
    records_names_for_testing = ['c1c2c(c(c(c1f)n3cc(c3)o)cl)n(cc(c2=o)c(=o)[o-])c4c(cc(c(n4)n)f)f-3', 'c1c2c(cc(c1f)n3ccncc3)n(cc(c2=o)c(=o)[o-])c4cc4-0']

    record_name = records_names_for_testing[0]
    assert record_name in record_names

    graph = qcarchive_utils.get_graph(collection, record_name)
    assert graph is not None


    graphs = qcarchive_utils.get_graphs(collection, records_names_for_testing)
    assert len(graphs) == 2
    assert graphs[0] is not None


def test_notsupported_dataset():
    from espaloma.data import qcarchive_utils

    name = "DBH24"
    collection_type = "reaction"
    collection, record_names = qcarchive_utils.get_collection(
        qcarchive_utils.get_client("ml.qcarchive.molssi.org"), collection_type, name
    )
    record_name = record_names[0]

    with pytest.raises(Exception):
        graph = qcarchive_utils.get_graph(collection, record_name, spec_name="spec_2")


def test_get_torsiondrive():
    from espaloma.data import qcarchive_utils
    import numpy as np

    record_name = "[h]c1c(c(c(c([c:1]1[n:2]([c:3](=[o:4])c(=c([h])[h])[h])c([h])([h])[h])[h])[h])n(=o)=o)[h]"

    # example dataset 
    name = "OpenFF Amide Torsion Set v1.0"
    collection_type = "torsiondrive"

    collection, record_names = qcarchive_utils.get_collection(
        qcarchive_utils.get_client(), collection_type, name
    )
    record_info = collection.get_record(record_name, specification_name="default")

    (
        flat_angles,
        xyz_in_order,
        energies_in_order,
        gradients_in_order,
    ) = qcarchive_utils.fetch_td_record(record_info)

    assert flat_angles.shape == (24,)
    assert energies_in_order.shape == (24,)
    assert gradients_in_order.shape == (24, 25, 3)
    assert xyz_in_order.shape == (24, 25, 3)

    assert np.isclose(energies_in_order[0], -722.2850260791969)
    assert np.all(
        flat_angles
        == np.array(
            [
                -165,
                -150,
                -135,
                -120,
                -105,
                -90,
                -75,
                -60,
                -45,
                -30,
                -15,
                0,
                15,
                30,
                45,
                60,
                75,
                90,
                105,
                120,
                135,
                150,
                165,
                180,
            ]
        )
    )
    assert np.allclose(
        xyz_in_order[0][0], np.array([-0.66407807, -8.59922225, -0.02685972])
    )


================================================
FILE: espaloma/data/tests/test_save_and_load.py
================================================
import pytest


def test_save_and_load():
    import espaloma as esp

    g = esp.Graph("C")
    ds = esp.data.dataset.GraphDataset([g])

    # Temporary directory will be automatically cleaned up
    from espaloma.data.utils import make_temp_directory

    with make_temp_directory() as tmpdir:
        import os

        filename = os.path.join(tmpdir, "ds")

        ds.save(filename)
        new_ds = esp.data.dataset.GraphDataset.load(filename)


================================================
FILE: espaloma/data/utils.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import random

import numpy as np
import pandas as pd
import torch
import contextlib

import espaloma as esp

OFFSETS = {
    1: -0.500607632585,
    6: -37.8302333826,
    7: -54.5680045287,
    8: -75.0362229210,
}

# ==============================================================================
# UTILITY FUNCTIONS
# ==============================================================================


@contextlib.contextmanager
def make_temp_directory():
    import tempfile, shutil

    temp_dir = tempfile.mkdtemp()
    try:
        yield temp_dir
    finally:
        shutil.rmtree(temp_dir)


def sum_offsets(elements):
    return sum([OFFSETS[element] for element in elements])


def from_csv(path, toolkit="rdkit", smiles_col=-1, y_cols=[-2], seed=2666):
    """Read csv from file."""

    def _from_csv():
        df = pd.read_csv(path)
        df_smiles = df.iloc[:, smiles_col]
        df_y = df.iloc[:, y_cols]

        if toolkit == "rdkit":
            from rdkit import Chem

            mols = [Chem.MolFromSmiles(smiles) for smiles in df_smiles]
            gs = [esp.HomogeneousGraph(mol) for mol in mols]

        elif toolkit == "openeye":
            from openeye import oechem

            mols = [
                oechem.OESmilesToMol(oechem.OEGraphMol(), smiles)
                for smiles in df_smiles
            ]
            gs = [esp.HomogeneousGraph(mol) for mol in mols]

        ds = list(zip(gs, list(torch.tensor(df_y.values))))

        random.seed(seed)
        random.shuffle(ds)

        return ds

    return _from_csv


def normalize(ds):
    """Get mean and std."""

    gs, ys = tuple(zip(*ds))
    y_mean = np.mean(ys)
    y_std = np.std(ys)

    def norm(y):
        return (y - y_mean) / y_std

    def unnorm(y):
        return y * y_std + y_mean

    return y_mean, y_std, norm, unnorm


def split(ds, partition):
    """Split the dataset according to some partition."""
    n_data = len(ds)

    # get the actual size of partition
    partition = [int(n_data * x / sum(partition)) for x in partition]

    ds_batched = []
    idx = 0
    for p_size in partition:
        ds_batched.append(ds[idx : idx + p_size])
        idx += p_size

    return ds_batched


def batch(ds, batch_size, seed=2666):
    """Batch graphs and values after shuffling."""
    import dgl

    # get the numebr of data
    n_data_points = len(ds)
    n_batches = n_data_points // batch_size  # drop the rest

    random.seed(seed)
    random.shuffle(ds)
    gs, ys = tuple(zip(*ds))

    gs_batched = [
        dgl.batch(gs[idx * batch_size : (idx + 1) * batch_size])
        for idx in range(n_batches)
    ]

    ys_batched = [
        torch.stack(ys[idx * batch_size : (idx + 1) * batch_size], dim=0)
        for idx in range(n_batches)
    ]

    return list(zip(gs_batched, ys_batched))


def collate_fn(graphs):
    import dgl

    return esp.HomogeneousGraph(dgl.batch(graphs))


def infer_mol_from_coordinates(
    coordinates,
    species,
    smiles_ref=None,
    coordinates_unit="angstrom",
):

    # local import
    from openeye import oechem
    from openmm import unit
    from openmm.unit import Quantity

    if isinstance(coordinates_unit, str):
        coordinates_unit = getattr(unit, coordinates_unit)

    # make sure we have the coordinates
    # in the unit system
    coordinates = Quantity(coordinates, coordinates_unit).value_in_unit(
        unit.angstrom  # to make openeye happy
    )

    # initialize molecule
    mol = oechem.OEGraphMol()

    if all(isinstance(symbol, str) for symbol in species):
        [
            mol.NewAtom(getattr(oechem, "OEElemNo_" + symbol))
            for symbol in species
        ]

    elif all(isinstance(symbol, int) for symbol in species):
        [
            mol.NewAtom(
                getattr(
                    oechem, "OEElemNo_" + oechem.OEGetAtomicSymbol(symbol)
                )
            )
            for symbol in species
        ]

    else:
        raise RuntimeError(
            "The species can only be all strings or all integers."
        )

    mol.SetCoords(coordinates.reshape([-1]))
    mol.SetDimension(3)
    oechem.OEDetermineConnectivity(mol)
    oechem.OEFindRingAtomsAndBonds(mol)
    oechem.OEPerceiveBondOrders(mol)

    if smiles_ref is not None:
        smiles_can = oechem.OECreateCanSmiString(mol)
        ims = oechem.oemolistream()
        ims.SetFormat(oechem.OEFormat_SMI)
        ims.openstring(smiles_ref)
        mol_ref = next(ims.GetOEMols())
        smiles_ref = oechem.OECreateCanSmiString(mol_ref)
        assert (
            smiles_ref == smiles_can
        ), "SMILES different. Input is %s, ref is %s" % (
            smiles_can,
            smiles_ref,
        )

    from openff.toolkit.topology import Molecule

    _mol = Molecule.from_openeye(mol, allow_undefined_stereo=True)
    g = esp.Graph(_mol)

    return g


================================================
FILE: espaloma/graphs/__init__.py
================================================
"""The basic data structure of espaloma---graph is represent a molecular system
and provide access to `dgl.DGLHeteroGraph` and `openff.toolkit.topology.Molecule.

"""
from . import deploy, utils
from .legacy_force_field import *


================================================
FILE: espaloma/graphs/deploy.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import numpy as np
import rdkit
import torch
from openff.toolkit.typing.engines.smirnoff import ForceField
import espaloma as esp
from openmm import unit
from openmm.unit import Quantity
import math

# =============================================================================
# CONSTANTS
# =============================================================================
OPENMM_LENGTH_UNIT = unit.nanometer
OPENMM_ANGLE_UNIT = unit.radian
OPENMM_ENERGY_UNIT = unit.kilojoule_per_mole

OPENMM_BOND_EQ_UNIT = OPENMM_LENGTH_UNIT
OPENMM_ANGLE_EQ_UNIT = OPENMM_ANGLE_UNIT
OPENMM_TORSION_K_UNIT = OPENMM_ENERGY_UNIT
OPENMM_TORSION_PHASE_UNIT = OPENMM_ANGLE_UNIT
OPENMM_BOND_K_UNIT = OPENMM_ENERGY_UNIT / (OPENMM_LENGTH_UNIT**2)
OPENMM_ANGLE_K_UNIT = OPENMM_ENERGY_UNIT / (OPENMM_ANGLE_UNIT**2)

# =============================================================================
# MODULE FUNCTIONS
# =============================================================================


def load_forcefield(forcefield="openff_unconstrained-2.2.1"):
    # get a forcefield
    try:
        ff = ForceField("%s.offxml" % forcefield)
    except Exception as e:
        print(e)
        raise NotImplementedError
    return ff


def openmm_system_from_graph(
    g,
    forcefield="openff_unconstrained-2.1.1",
    suffix="",
    charge_method="nn",
    create_system_kwargs={},
):
    """Construct an openmm system from `espaloma.Graph`.

    Parameters
    ----------
    g : `espaloma.Graph`
        Input graph.

    forcefield : `str`, optional, default='openff_unconstrained-2.1.1'
        Name of the force field. Have to be Open Force Field.
        (this forcefield will be used to assign nonbonded parameters, but all of its valence parameters will be overwritten)

    suffix : `str`
        Suffix for the force terms.

    charge_method : str, optional, default='nn'
        Method to use for assigning partial charges:
        'nn' : Assign partial charges from the espaloma graph net model
        'am1-bcc' : Allow the OpenFF toolkit to assign AM1-BCC charges using default backend
        'gasteiger' : Assign Gasteiger partial charges (not recommended)
        'from-molecule' : Use partial charges provided in the original `Molecule` object

    Returns
    -------
    sys : `openmm.System`
        Constructed single-molecule OpenMM system.


    """
    ff = load_forcefield(forcefield)

    # get the mapping between position and indices
    bond_lookup = {
        tuple(idxs.detach().numpy()): position
        for position, idxs in enumerate(g.nodes["n2"].data["idxs"])
    }

    angle_lookup = {
        tuple(idxs.detach().numpy()): position
        for position, idxs in enumerate(g.nodes["n3"].data["idxs"])
    }

    if charge_method == "gasteiger":
        # from rdkit.Chem.AllChem import ComputeGasteigerCharges
        # rdkit_mol = g.mol.to_rdkit()
        # ComputeGasteigerCharges(rdkit_mol)
        # charges = [atom.GetDoubleProp("_GasteigerCharge") for atom in rdkit_mol.GetAtoms()]
        g.mol.assign_partial_charges("gasteiger")
        sys = ff.create_openmm_system(
            g.mol.to_topology(), charge_from_molecules=[g.mol]
        )

    elif charge_method == "am1-bcc":
        g.mol.assign_partial_charges("am1bcc")
        sys = ff.create_openmm_system(
            g.mol.to_topology(), charge_from_molecules=[g.mol]
        )

    elif charge_method == "from-molecule":
        sys = ff.create_openmm_system(
            g.mol.to_topology(), charge_from_molecules=[g.mol]
        )

    elif charge_method == "nn":
        g.mol.partial_charges = unit.elementary_charge * g.nodes["n1"].data[
            "q"
        ].flatten().detach().cpu().numpy().astype(
            np.float64,
        )
        sys = ff.create_openmm_system(
            g.mol.to_topology(),
            charge_from_molecules=[g.mol],
            allow_nonintegral_charges=True,
        )

    else:
        # create openmm system
        raise RuntimeError(
            "Charge method %s is not supported. " % charge_method
        )

    for force in sys.getForces():
        name = force.__class__.__name__
        if "HarmonicBondForce" in name:
            assert force.getNumBonds() * 2 == g.heterograph.number_of_nodes(
                "n2"
            )

            for idx in range(force.getNumBonds()):
                idx0, idx1, eq, k = force.getBondParameters(idx)
                position = bond_lookup[(idx0, idx1)]
                _eq = (
                    g.nodes["n2"]
                    .data["eq%s" % suffix][position]
                    .detach()
                    .numpy()
                    .item()
                )
                _k = (
                    g.nodes["n2"]
                    .data["k%s" % suffix][position]
                    .detach()
                    .numpy()
                    .item()
                )

                _eq = Quantity(  # bond length
                    _eq,
                    esp.units.DISTANCE_UNIT,
                ).value_in_unit(OPENMM_BOND_EQ_UNIT)

                _k = Quantity(  # bond force constant:
                    # since everything is enumerated twice in espaloma
                    # and once in OpenMM,
                    # we insert a coefficient of 2.0
                    _k,
                    esp.units.FORCE_CONSTANT_UNIT,
                ).value_in_unit(OPENMM_BOND_K_UNIT)

                force.setBondParameters(idx, idx0, idx1, _eq, _k)

        if "HarmonicAngleForce" in name:
            assert force.getNumAngles() * 2 == g.heterograph.number_of_nodes(
                "n3"
            )

            for idx in range(force.getNumAngles()):
                idx0, idx1, idx2, eq, k = force.getAngleParameters(idx)
                position = angle_lookup[(idx0, idx1, idx2)]
                _eq = (
                    g.nodes["n3"]
                    .data["eq%s" % suffix][position]
                    .detach()
                    .numpy()
                    .item()
                )
                _k = (
                    g.nodes["n3"]
                    .data["k%s" % suffix][position]
                    .detach()
                    .numpy()
                    .item()
                )

                _eq = Quantity(
                    _eq,
                    esp.units.ANGLE_UNIT,
                ).value_in_unit(OPENMM_ANGLE_EQ_UNIT)

                _k = Quantity(  # force constant
                    # since everything is enumerated twice in espaloma
                    # and once in OpenMM,
                    # we insert a coefficient of 2.0
                    _k,
                    esp.units.ANGLE_FORCE_CONSTANT_UNIT,
                ).value_in_unit(OPENMM_ANGLE_K_UNIT)

                force.setAngleParameters(idx, idx0, idx1, idx2, _eq, _k)

        if "PeriodicTorsionForce" in name:
            number_of_torsions = force.getNumTorsions()
            if (
                "periodicity%s" % suffix not in g.nodes["n4"].data
                or "phase%s" % suffix not in g.nodes["n4"].data
            ):

                g.nodes["n4"].data["periodicity%s" % suffix] = torch.arange(
                    1, 7
                )[None, :].repeat(g.heterograph.number_of_nodes("n4"), 1)

                g.nodes["n4"].data["phases%s" % suffix] = torch.zeros(
                    g.heterograph.number_of_nodes("n4"), 6
                )

                g.nodes["n4_improper"].data[
                    "periodicity%s" % suffix
                ] = torch.arange(1, 7)[None, :].repeat(
                    g.heterograph.number_of_nodes("n4_improper"), 1
                )

                g.nodes["n4_improper"].data[
                    "phases%s" % suffix
                ] = torch.zeros(
                    g.heterograph.number_of_nodes("n4_improper"), 6
                )

            count_idx = 0
            for idx in range(g.heterograph.number_of_nodes("n4")):
                idx0 = g.nodes["n4"].data["idxs"][idx, 0].item()
                idx1 = g.nodes["n4"].data["idxs"][idx, 1].item()
                idx2 = g.nodes["n4"].data["idxs"][idx, 2].item()
                idx3 = g.nodes["n4"].data["idxs"][idx, 3].item()

                # assuming both (a,b,c,d) and (d,c,b,a) are listed for every torsion, only pick one of the orderings
                if idx0 < idx3:
                    periodicities = g.nodes["n4"].data[
                        "periodicity%s" % suffix
                    ][idx]
                    phases = g.nodes["n4"].data["phases%s" % suffix][idx]
                    ks = g.nodes["n4"].data["k%s" % suffix][idx]
                    for sub_idx in range(ks.flatten().shape[0]):
                        k = ks[sub_idx].item()
                        if k != 0.0:
                            _periodicity = periodicities[sub_idx].item()
                            _phase = phases[sub_idx].item()

                            if k < 0:
                                k = -k
                                _phase = math.pi - _phase

                            k = Quantity(
                                k,
                                esp.units.ENERGY_UNIT,
                            ).value_in_unit(
                                OPENMM_ENERGY_UNIT,
                            )

                            if count_idx < number_of_torsions:
                                force.setTorsionParameters(
                                    # since everything is enumerated
                                    # twice in espaloma
                                    # and once in OpenMM,
                                    # we insert a coefficient of 2.0
                                    count_idx,
                                    idx0,
                                    idx1,
                                    idx2,
                                    idx3,
                                    _periodicity,
                                    _phase,
                                    k,
                                )

                            else:
                                force.addTorsion(
                                    # since everything is enumerated
                                    # twice in espaloma
                                    # and once in OpenMM,
                                    # we insert a coefficient of 2.0
                                    idx0,
                                    idx1,
                                    idx2,
                                    idx3,
                                    _periodicity,
                                    _phase,
                                    k,
                                )

                            count_idx += 1

            if "k%s" % suffix in g.nodes["n4_improper"].data:
                for idx in range(
                    g.heterograph.number_of_nodes("n4_improper")
                ):
                    idx0 = g.nodes["n4_improper"].data["idxs"][idx, 0].item()
                    idx1 = g.nodes["n4_improper"].data["idxs"][idx, 1].item()
                    idx2 = g.nodes["n4_improper"].data["idxs"][idx, 2].item()
                    idx3 = g.nodes["n4_improper"].data["idxs"][idx, 3].item()

                    periodicities = g.nodes["n4_improper"].data[
                        "periodicity%s" % suffix
                    ][idx]
                    phases = g.nodes["n4_improper"].data["phases%s" % suffix][
                        idx
                    ]
                    ks = g.nodes["n4_improper"].data["k%s" % suffix][idx]
                    for sub_idx in range(ks.flatten().shape[0]):
                        k = ks[sub_idx].item()
                        if k != 0.0:
                            _periodicity = periodicities[sub_idx].item()
                            _phase = phases[sub_idx].item()

                            if k < 0:
                                k = -k
                                _phase = math.pi - _phase

                            k = Quantity(
                                k,
                                esp.units.ENERGY_UNIT,
                            ).value_in_unit(
                                OPENMM_ENERGY_UNIT,
                            )

                            if count_idx < number_of_torsions:
                                force.setTorsionParameters(
                                    # since everything is enumerated
                                    # twice in espaloma
                                    # and once in OpenMM,
                                    # we insert a coefficient of 2.0
                                    count_idx,
                                    idx0,
                                    idx1,
                                    idx2,
                                    idx3,
                                    _periodicity,
                                    _phase,
                                    0.5 * k,
                                )

                            else:
                                force.addTorsion(
                                    # since everything is enumerated
                                    # twice in espaloma
                                    # and once in OpenMM,
                                    # we insert a coefficient of 2.0
                                    idx0,
                                    idx1,
                                    idx2,
                                    idx3,
                                    _periodicity,
                                    _phase,
                                    0.5 * k,
                                )

                            count_idx += 1

    return sys


================================================
FILE: espaloma/graphs/graph.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import abc
import io
import openff.toolkit

import espaloma as esp


# =============================================================================
# MODULE CLASSES
# =============================================================================
class BaseGraph(abc.ABC):
    """Base class of graph."""

    def __init__(self):
        super(BaseGraph, self).__init__()


class Graph(BaseGraph):
    """A unified graph object that support translation to and from
    message-passing graphs and MM factor graph.

    Methods
    -------
    save(path)
        Save graph to file.

    load(path)
        Load a graph from path.

    Note
    ----
    This object provides access to popular attributes of homograph and
    heterograph.

    This object also provides access to `ndata` and `edata` from the heterograph.

    Examples
    --------
    >>> g0 = esp.Graph("C")
    >>> g1 = esp.Graph(Molecule.from_smiles("C"))
    >>> assert g0 == g1

    """

    def __init__(self, mol=None, homograph=None, heterograph=None):
        # TODO : more pythonic way allow multiple constructors:
        #   Graph.from_smiles(...), Graph.from_mol(...), Graph.from_homograph(...), ...
        #   rather than Graph(mol=None, homograph=None, ...)

        # input molecule
        if isinstance(mol, str):
            from openff.toolkit.topology import Molecule

            mol = Molecule.from_smiles(mol, allow_undefined_stereo=True)
        if mol is not None and homograph is None and heterograph is None:
            homograph = self.get_homograph_from_mol(mol)

        if homograph is not None and heterograph is None:
            heterograph = self.get_heterograph_from_graph_and_mol(
                homograph, mol
            )

        self.mol = mol
        self.homograph = homograph
        self.heterograph = heterograph

    def save(self, path):
        import os
        import json
        import dgl

        os.mkdir(path)
        dgl.save_graphs(path + "/homograph.bin", [self.homograph])
        dgl.save_graphs(path + "/heterograph.bin", [self.heterograph])
        with open(path + "/mol.json", "w") as f_handle:
            json.dump(self.mol.to_json(), f_handle)

    @classmethod
    def load(cls, path):
        import json
        import dgl

        homograph = dgl.load_graphs(path + "/homograph.bin")[0][0]
        heterograph = dgl.load_graphs(path + "/heterograph.bin")[0][0]

        with open(path + "/mol.json", "r") as f_handle:
            mol = json.load(f_handle)
        from openff.toolkit.topology import Molecule

        # With OFF toolkit >=0.11, from_json requires the "hierarchy_schemes" key
        # which is not created with previous toolkit versions. That means, from_json
        # errors out when loading molecules that were json serialized with older
        # toolkit versions.
        try:
            mol = Molecule.from_json(mol)
        except KeyError:
            # this probably means hierarchy_schemes key wasn't found
            mol_dict = json.load(io.StringIO(mol))
            if "hierarchy_schemes" not in mol_dict.keys():
                mol_dict["hierarchy_schemes"] = dict()  # Default to empty dict if not present
            mol = Molecule.from_dict(mol_dict)

        g = cls(mol=mol, homograph=homograph, heterograph=heterograph)
        return g

    @staticmethod
    def get_homograph_from_mol(mol):
        assert isinstance(
            mol, openff.toolkit.topology.Molecule
        ), "mol can only be OFF Molecule object."

        # TODO:
        # rewrite this using OFF-generic grammar
        # graph = esp.graphs.utils.read_homogeneous_graph.from_rdkit_mol(
        #     mol.to_rdkit()
        # )

        graph = (
            esp.graphs.utils.read_homogeneous_graph.from_openff_toolkit_mol(
                mol
            )
        )
        return graph

    @staticmethod
    def get_heterograph_from_graph_and_mol(graph, mol):
        import dgl

        assert isinstance(
            graph, dgl.DGLGraph
        ), "graph can only be dgl Graph object."

        heterograph = esp.graphs.utils.read_heterogeneous_graph.from_homogeneous_and_mol(
            graph, mol
        )

        return heterograph

    #
    # @property
    # def mol(self):
    #     return self._mol
    #
    # @property
    # def homograph(self):
    #     return self._homograph
    #
    # @property
    # def heterograph(self):
    #     return self._heterograph

    @property
    def ndata(self):
        return self.homograph.ndata

    @property
    def edata(self):
        return self.homograph.edata

    @property
    def nodes(self):
        return self.heterograph.nodes


================================================
FILE: espaloma/graphs/legacy_force_field.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import rdkit
import torch
from openff.toolkit import Molecule
import espaloma as esp

from openmmforcefields.generators import SystemGenerator
import openmm
from openmm import unit
from openmm.app import Simulation
from openmm.unit import Quantity

# =============================================================================
# CONSTANTS
# =============================================================================
REDUNDANT_TYPES = {
    "cd": "cc",
    "cf": "ce",
    "cq": "cp",
    "pd": "pc",
    "pf": "pe",
    "nd": "nc",
}

# simulation specs
TEMPERATURE = 350 * unit.kelvin
STEP_SIZE = 1.0 * unit.femtosecond
COLLISION_RATE = 1.0 / unit.picosecond
EPSILON_MIN = 0.05 * unit.kilojoules_per_mole

# =============================================================================
# MODULE CLASSES
# =============================================================================
class LegacyForceField:
    """Class to hold legacy forcefield for typing and parameter assignment.

    Parameters
    ----------
    forcefield : string
        name and version of the forcefield.

    Methods
    -------
    parametrize()
        Parametrize a molecular system.

    typing()
        Provide legacy typing for a molecular system.

    """

    def __init__(self, forcefield="gaff-1.81"):
        self.forcefield = forcefield
        self._prepare_forcefield()

    @staticmethod
    def _convert_to_off(mol):
        if isinstance(mol, esp.Graph):
            return mol.mol

        elif isinstance(mol, Molecule):
            return mol
        elif isinstance(mol, rdkit.Chem.rdchem.Mol):
            return Molecule.from_rdkit(mol)
        elif "openeye" in str(
            type(mol)
        ):  # because we don't want to depend on OE
            return Molecule.from_openeye(mol)

    def _prepare_forcefield(self):

        if "gaff" in self.forcefield:
            self._prepare_gaff()

        elif "smirnoff" in self.forcefield:
            # do nothing for now
            self._prepare_smirnoff()

        elif "openff" in self.forcefield:
            self._prepare_openff()

        else:
            raise NotImplementedError

    def _prepare_openff(self):

        from openff.toolkit import ForceField

        self.FF = ForceField("%s.offxml" % self.forcefield)

    def _prepare_smirnoff(self):

        from openff.toolkit import ForceField

        self.FF = ForceField("%s.offxml" % self.forcefield)

    def _prepare_gaff(self):
        import os
        import xml.etree.ElementTree as ET

        import openmmforcefields

        # get the openff.toolkits path
        openmmforcefields_path = os.path.dirname(openmmforcefields.__file__)

        # get the xml path
        ffxml_path = (
            openmmforcefields_path
            + "/ffxml/amber/gaff/ffxml/"
            + self.forcefield
            + ".xml"
        )

        # parse xml
        tree = ET.parse(ffxml_path)
        root = tree.getroot()
        nonbonded = root.find("NonbondedForce")
        atom_types = [atom.get("class") for atom in nonbonded.findall("Atom")]

        # remove redundant types
        [atom_types.remove(bad_type) for bad_type in REDUNDANT_TYPES.keys()]

        # compose the translation dictionaries
        str_2_idx = dict(zip(atom_types, range(len(atom_types))))
        idx_2_str = dict(zip(range(len(atom_types)), atom_types))

        # provide mapping for redundant types
        for bad_type, good_type in REDUNDANT_TYPES.items():
            str_2_idx[bad_type] = str_2_idx[good_type]

        # make translation dictionaries attributes of self
        self._str_2_idx = str_2_idx
        self._idx_2_str = idx_2_str

    def _type_gaff(self, g):
        """Type a molecular graph using gaff force fields."""
        # assert the forcefield is indeed of gaff family
        assert "gaff" in self.forcefield

        # make sure mol is in openff.toolkit format `
        mol = g.mol

        # import template generator
        from openmmforcefields.generators import GAFFTemplateGenerator

        gaff = GAFFTemplateGenerator(
            molecules=mol, forcefield=self.forcefield
        )

        # create temporary directory for running antechamber
        import os
        import shutil
        import tempfile

        tempdir = tempfile.mkdtemp()
        prefix = "molecule"
        input_sdf_filename = os.path.join(tempdir, prefix + ".sdf")
        gaff_mol2_filename = os.path.join(tempdir, prefix + ".gaff.mol2")
        frcmod_filename = os.path.join(tempdir, prefix + ".frcmod")

        # write sdf for input
        mol.to_file(input_sdf_filename, file_format="sdf")

        # run antechamber
        gaff._run_antechamber(
            molecule_filename=input_sdf_filename,
            input_format="mdl",
            gaff_mol2_filename=gaff_mol2_filename,
            frcmod_filename=frcmod_filename,
        )

        gaff._read_gaff_atom_types_from_mol2(gaff_mol2_filename, mol)
        gaff_types = [atom.gaff_type for atom in mol.atoms]
        shutil.rmtree(tempdir)

        # put types into graph object
        if g is None:
            g = esp.Graph(mol)

        g.nodes["n1"].data["legacy_typing"] = torch.tensor(
            [self._str_2_idx[atom] for atom in gaff_types]
        )

        return g

    def _parametrize_gaff(self, g, n_max_phases=6):
        from openmmforcefields.generators import SystemGenerator

        # define a system generator
        system_generator = SystemGenerator(
            small_molecule_forcefield=self.forcefield,
        )

        mol = g.mol
        # mol.assign_partial_charges("formal_charge")
        # create system
        sys = system_generator.create_system(
            topology=mol.to_topology().to_openmm(),
            molecules=mol,
        )

        bond_lookup = {
            tuple(idxs.detach().numpy()): position
            for position, idxs in enumerate(g.nodes["n2"].data["idxs"])
        }

        angle_lookup = {
            tuple(idxs.detach().numpy()): position
            for position, idxs in enumerate(g.nodes["n3"].data["idxs"])
        }

        torsion_lookup = {
            tuple(idxs.detach().numpy()): position
            for position, idxs in enumerate(g.nodes["n4"].data["idxs"])
        }

        improper_lookup = {
            tuple(idxs.detach().numpy()): position
            for position, idxs in enumerate(
                g.nodes["n4_improper"].data["idxs"]
            )
        }

        torsion_phases = torch.zeros(
            g.heterograph.number_of_nodes("n4"),
            n_max_phases,
        )

        torsion_periodicities = torch.zeros(
            g.heterograph.number_of_nodes("n4"),
            n_max_phases,
        )

        torsion_ks = torch.zeros(
            g.heterograph.number_of_nodes("n4"),
            n_max_phases,
        )

        improper_phases = torch.zeros(
            g.heterograph.number_of_nodes("n4"),
            n_max_phases,
        )

        improper_periodicities = torch.zeros(
            g.heterograph.number_of_nodes("n4"),
            n_max_phases,
        )

        improper_ks = torch.zeros(
            g.heterograph.number_of_nodes("n4"),
            n_max_phases,
        )

        for force in sys.getForces():
            name = force.__class__.__name__
            if "HarmonicBondForce" in name:
                assert (
                    force.getNumBonds() * 2
                    == g.heterograph.number_of_nodes("n2")
                )

                g.nodes["n2"].data["eq_ref"] = torch.zeros(
                    force.getNumBonds() * 2, 1
                )

                g.nodes["n2"].data["k_ref"] = torch.zeros(
                    force.getNumBonds() * 2, 1
                )

                for idx in range(force.getNumBonds()):
                    idx0, idx1, eq, k = force.getBondParameters(idx)

                    position = bond_lookup[(idx0, idx1)]
                    g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit(
                        esp.units.DISTANCE_UNIT,
                    )
                    g.nodes["n2"].data["k_ref"][position] = k.value_in_unit(
                        esp.units.FORCE_CONSTANT_UNIT,
                    )

                    position = bond_lookup[(idx1, idx0)]
                    g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit(
                        esp.units.DISTANCE_UNIT,
                    )
                    g.nodes["n2"].data["k_ref"][position] = k.value_in_unit(
                        esp.units.FORCE_CONSTANT_UNIT,
                    )

            if "HarmonicAngleForce" in name:
                assert (
                    force.getNumAngles() * 2
                    == g.heterograph.number_of_nodes("n3")
                )

                g.nodes["n3"].data["eq_ref"] = torch.zeros(
                    force.getNumAngles() * 2, 1
                )

                g.nodes["n3"].data["k_ref"] = torch.zeros(
                    force.getNumAngles() * 2, 1
                )

                for idx in range(force.getNumAngles()):
                    idx0, idx1, idx2, eq, k = force.getAngleParameters(idx)

                    position = angle_lookup[(idx0, idx1, idx2)]
                    g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit(
                        esp.units.ANGLE_UNIT,
                    )
                    g.nodes["n3"].data["k_ref"][position] = k.value_in_unit(
                        esp.units.ANGLE_FORCE_CONSTANT_UNIT,
                    )

                    position = angle_lookup[(idx2, idx1, idx0)]
                    g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit(
                        esp.units.ANGLE_UNIT,
                    )
                    g.nodes["n3"].data["k_ref"][position] = k.value_in_unit(
                        esp.units.ANGLE_FORCE_CONSTANT_UNIT,
                    )

            if "PeriodicTorsionForce" in name:
                for idx in range(force.getNumTorsions()):
                    (
                        idx0,
                        idx1,
                        idx2,
                        idx3,
                        periodicity,
                        phase,
                        k,
                    ) = force.getTorsionParameters(idx)

                    if (idx0, idx1, idx2, idx3) in torsion_lookup:
                        position = torsion_lookup[(idx0, idx1, idx2, idx3)]
                        for sub_idx in range(n_max_phases):
                            if torsion_ks[position, sub_idx] == 0:
                                torsion_ks[
                                    position, sub_idx
                                ] = 0.5 * k.value_in_unit(
                                    esp.units.ENERGY_UNIT
                                )
                                torsion_phases[
                                    position, sub_idx
                                ] = phase.value_in_unit(esp.units.ANGLE_UNIT)
                                torsion_periodicities[
                                    position, sub_idx
                                ] = periodicity

                                position = torsion_lookup[
                                    (idx3, idx2, idx1, idx0)
                                ]
                                torsion_ks[
                                    position, sub_idx
                                ] = 0.5 * k.value_in_unit(
                                    esp.units.ENERGY_UNIT
                                )
                                torsion_phases[
                                    position, sub_idx
                                ] = phase.value_in_unit(esp.units.ANGLE_UNIT)
                                torsion_periodicities[
                                    position, sub_idx
                                ] = periodicity
                                break

            g.heterograph.apply_nodes(
                lambda nodes: {
                    "k_ref": torsion_ks,
                    "periodicity_ref": torsion_periodicities,
                    "phases_ref": torsion_phases,
                },
                ntype="n4",
            )

            """
            g.heterograph.apply_nodes(
                    lambda nodes: {
                        "k_ref": improper_ks,
                        "periodicity_ref": improper_periodicities,
                        "phases_ref": improper_phases,
                    },
                    ntype="n4_improper"
            )

            """

        """
        def apply_torsion(node, n_max_phases=6):
            phases = torch.zeros(
                g.heterograph.number_of_nodes("n4"), n_max_phases,
            )

            periodicity = torch.zeros(
                g.heterograph.number_of_nodes("n4"), n_max_phases,
            )

            k = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,)

            for idx in range(g.heterograph.number_of_nodes("n4")):
                idxs = tuple(node.data["idxs"][idx].numpy())
                if idxs in force:
                    _force = force[idxs]
                    for sub_idx in range(len(_force.periodicity)):
                        if hasattr(_force, "k%s" % sub_idx):
                            k[idx, sub_idx] = getattr(
                                _force, "k%s" % sub_idx
                            ).value_in_unit(esp.units.ENERGY_UNIT)

                            phases[idx, sub_idx] = getattr(
                                _force, "phase%s" % sub_idx
                            ).value_in_unit(esp.units.ANGLE_UNIT)

                            periodicity[idx, sub_idx] = getattr(
                                _force, "periodicity%s" % sub_idx
                            )

            return {
                "k_ref": k,
                "periodicity_ref": periodicity,
                "phases_ref": phases,
            }

        g.heterograph.apply_nodes(apply_torsion, ntype="n4")
        """

        return g

    def _parametrize_smirnoff(self, g):
        from openff.units import unit as openff_unit

        OPENFF_FORCE_CONSTANT_UNIT = openff_unit

        forces = self.FF.label_molecules(g.mol.to_topology())[0]

        g.heterograph.apply_nodes(
            lambda node: {
                "k_ref": 2.0
                * torch.Tensor(
                    [
                        forces["Bonds"][
                            tuple(node.data["idxs"][idx].numpy())
                        ].k.to_openmm().value_in_unit(esp.units.FORCE_CONSTANT_UNIT)
                        for idx in range(node.data["idxs"].shape[0])
                    ]
                )[:, None]
            },
            ntype="n2",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "eq_ref": torch.Tensor(
                    [
                        forces["Bonds"][
                            tuple(node.data["idxs"][idx].numpy())
                        ].length.to_openmm().value_in_unit(esp.units.DISTANCE_UNIT)
                        for idx in range(node.data["idxs"].shape[0])
                    ]
                )[:, None]
            },
            ntype="n2",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "k_ref": 2.0
                * torch.Tensor(  # OpenFF records 1/2k as param
                    [
                        forces["Angles"][
                            tuple(node.data["idxs"][idx].numpy())
                        ].k.to_openmm().value_in_unit(esp.units.ANGLE_FORCE_CONSTANT_UNIT)
                        for idx in range(node.data["idxs"].shape[0])
                    ]
                )[:, None]
            },
            ntype="n3",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "eq_ref": torch.Tensor(
                    [
                        forces["Angles"][
                            tuple(node.data["idxs"][idx].numpy())
                        ].angle.to_openmm().value_in_unit(esp.units.ANGLE_UNIT)
                        for idx in range(node.data["idxs"].shape[0])
                    ]
                )[:, None]
            },
            ntype="n3",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "epsilon_ref": torch.Tensor(
                    [
                        forces["vdW"][(idx,)].epsilon.to_openmm().value_in_unit(
                            esp.units.ENERGY_UNIT
                        )
                        for idx in range(g.heterograph.number_of_nodes("n1"))
                    ]
                )[:, None]
            },
            ntype="n1",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "sigma_ref": torch.Tensor(
                    [
                        forces["vdW"][(idx,)].rmin_half.to_openmm().value_in_unit(
                            esp.units.DISTANCE_UNIT
                        )
                        for idx in range(g.heterograph.number_of_nodes("n1"))
                    ]
                )[:, None]
            },
            ntype="n1",
        )

        def apply_torsion(node, n_max_phases=6):
            phases = torch.zeros(
                g.heterograph.number_of_nodes("n4"),
                n_max_phases,
            )

            periodicity = torch.zeros(
                g.heterograph.number_of_nodes("n4"),
                n_max_phases,
            )

            k = torch.zeros(
                g.heterograph.number_of_nodes("n4"),
                n_max_phases,
            )

            force = forces["ProperTorsions"]

            for idx in range(g.heterograph.number_of_nodes("n4")):
                idxs = tuple(node.data["idxs"][idx].numpy())
                if idxs in force:
                    _force = force[idxs]
                    for sub_idx in range(len(_force.periodicity)):
                        if hasattr(_force, "k%s" % sub_idx):
                            k[idx, sub_idx] = getattr(
                                _force, "k%s" % sub_idx
                            ).to_openmm().value_in_unit(esp.units.ENERGY_UNIT)

                            phases[idx, sub_idx] = getattr(
                                _force, "phase%s" % sub_idx
                            ).to_openmm().value_in_unit(esp.units.ANGLE_UNIT)

                            periodicity[idx, sub_idx] = getattr(
                                _force, "periodicity%s" % sub_idx
                            )

            return {
                "k_ref": k,
                "periodicity_ref": periodicity,
                "phases_ref": phases,
            }

        def apply_improper_torsion(node, n_max_phases=6):
            phases = torch.zeros(
                g.heterograph.number_of_nodes("n4_improper"),
                n_max_phases,
            )

            periodicity = torch.zeros(
                g.heterograph.number_of_nodes("n4_improper"),
                n_max_phases,
            )

            k = torch.zeros(
                g.heterograph.number_of_nodes("n4_improper"),
                n_max_phases,
            )

            force = forces["ImproperTorsions"]

            for idx in range(g.heterograph.number_of_nodes("n4_improper")):
                idxs = tuple(node.data["idxs"][idx].numpy())
                if idxs in force:
                    _force = force[idxs]
                    for sub_idx in range(len(_force.periodicity)):

                        if hasattr(_force, "k%s" % sub_idx):
                            k[idx, sub_idx] = getattr(
                                _force, "k%s" % sub_idx
                            ).to_openmm().value_in_unit(esp.units.ENERGY_UNIT)

                            phases[idx, sub_idx] = getattr(
                                _force, "phase%s" % sub_idx
                            ).to_openmm().value_in_unit(esp.units.ANGLE_UNIT)

                            periodicity[idx, sub_idx] = getattr(
                                _force, "periodicity%s" % sub_idx
                            )

            return {
                "k_ref": k,
                "periodicity_ref": periodicity,
                "phases_ref": phases,
            }

        g.heterograph.apply_nodes(apply_torsion, ntype="n4")
        g.heterograph.apply_nodes(apply_improper_torsion, ntype="n4_improper")

        return g

    def baseline_energy(self, g, suffix=None):
        if suffix is None:
            suffix = "_" + self.forcefield

        from openmmforcefields.generators import SystemGenerator

        # define a system generator
        system_generator = SystemGenerator(
            small_molecule_forcefield=self.forcefield,
        )

        mol = g.mol
        # mol.assign_partial_charges("formal_charge")
        # create system
        system = system_generator.create_system(
            topology=mol.to_topology().to_openmm(),
            molecules=mol,
        )

        # parameterize topology
        topology = g.mol.to_topology().to_openmm()

        integrator = openmm.LangevinIntegrator(
            TEMPERATURE, COLLISION_RATE, STEP_SIZE
        )

        # create simulation
        simulation = Simulation(
            topology=topology, system=system, integrator=integrator
        )

        us = []

        xs = (
            Quantity(
                g.nodes["n1"].data["xyz"].detach().numpy(),
                esp.units.DISTANCE_UNIT,
            )
            .value_in_unit(unit.nanometer)
            .transpose((1, 0, 2))
        )

        for x in xs:
            simulation.context.setPositions(x)
            us.append(
                simulation.context.getState(getEnergy=True)
                .getPotentialEnergy()
                .value_in_unit(esp.units.ENERGY_UNIT)
            )

        g.nodes["g"].data["u%s" % suffix] = torch.tensor(us)[None, :]

        return g

    def _multi_typing_smirnoff(self, g):
        # mol = self._convert_to_off(mol)

        forces = self.FF.label_molecules(g.mol.to_topology())[0]

        g.heterograph.apply_nodes(
            lambda node: {
                "legacy_typing": torch.Tensor(
                    [
                        int(
                            forces["Bonds"][
                                tuple(node.data["idxs"][idx].numpy())
                            ].id[1:]
                        )
                        for idx in range(node.data["idxs"].shape[0])
                    ]
                ).long()
            },
            ntype="n2",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "legacy_typing": torch.Tensor(
                    [
                        int(
                            forces["Angles"][
                                tuple(node.data["idxs"][idx].numpy())
                            ].id[1:]
                        )
                        for idx in range(node.data["idxs"].shape[0])
                    ]
                ).long()
            },
            ntype="n3",
        )

        g.heterograph.apply_nodes(
            lambda node: {
                "legacy_typing": torch.Tensor(
                    [
                        int(forces["vdW"][(idx,)].id[1:])
                        for idx in range(g.heterograph.number_of_nodes("n1"))
                    ]
                ).long()
            },
            ntype="n1",
        )

        return g

    def parametrize(self, g):
        """Parametrize a molecular graph."""
        if "smirnoff" in self.forcefield or "openff" in self.forcefield:
            return self._parametrize_smirnoff(g)

        elif "gaff" in self.forcefield:
            return self._parametrize_gaff(g)

        else:
            raise NotImplementedError

    def typing(self, g):
        """Type a molecular graph."""
        if "gaff" in self.forcefield:
            return self._type_gaff(g)

        else:
            raise NotImplementedError

    def multi_typing(self, g):
        """Type a molecular graph for hetero nodes."""
        if "smirnoff" in self.forcefield:
            return self._multi_typing_smirnoff(g)

        else:
            raise NotImplementedError

    def __call__(self, *args, **kwargs):
        return self.typing(*args, **kwargs)


================================================
FILE: espaloma/graphs/tests/test_deploy.py
================================================
import openmm
import urllib.request
import numpy.testing as npt
import espaloma as esp
from openmm import unit

omm_angle_unit = unit.radian
omm_energy_unit = unit.kilojoule_per_mole
from openmm.unit import Quantity


def test_butane_charge_am1bcc():
    """check that esp.graphs.deploy.openmm_system_from_graph runs without error on butane using
    am1-bcc charge method"""
    ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0")
    g = esp.Graph("CCCC")
    g = ff.parametrize(g)
    esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="am1-bcc")

def test_butane_charge_nn():
    """check that esp.graphs.deploy.openmm_system_from_graph runs without error on butane using
    the nn charge method"""
    import torch
    # Download serialized espaloma model
    url = f'https://github.com/choderalab/espaloma/releases/download/0.3.0/espaloma-0.3.0rc1.pt'
    espaloma_model_filepath = f'espaloma-0.3.0rc1.pt'
    urllib.request.urlretrieve(url, filename=espaloma_model_filepath)
    # Test deployment
    ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0")
    g = esp.Graph("CCCC")
    g = ff.parametrize(g)
    # apply a trained espaloma model to assign parameters
    net = torch.load(espaloma_model_filepath, map_location=torch.device('cpu'))
    net.eval()
    net(g.heterograph)
    esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="nn")

def test_caffeine():
    """Test Openmm system deployment of caffeine method using the charges from the molecule runs
    without error."""
    ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0")
    g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
    g = ff.parametrize(g)
    g.mol.assign_partial_charges("am1bcc")  # Assign charges after parametrizing
    esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="from-molecule")


def test_parameter_consistent_caffeine():
    ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0")
    g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
    g = ff.parametrize(g)
    system = esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="am1-bcc")
    forces = list(system.getForces())
    openff_forces = ff.FF.label_molecules(g.mol.to_topology())[0]
    for idx, force in enumerate(forces):
        force.setForceGroup(idx)
        name = force.__class__.__name__
        if "HarmonicBondForce" in name:
            for _idx in range(force.getNumBonds()):
                start, end, eq, k_openmm = force.getBondParameters(_idx)

                k_openff = openff_forces["Bonds"][(start, end)].k.to_openmm()

                npt.assert_almost_equal(
                    k_openmm / k_openff,
                    2.0,
                    decimal=3,
                )


def test_energy_consistent_caffeine():
    """Deploy a caffeine molecule parametrized by a traditional force field
    and deployed by espaloma, make sure the energies computed using espaloma
    and OpenMM are same or close.

    """
    # grab a force field
    ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0")

    # parametrize caffeine molecule using the parametrization
    ## Should there be a second test for SMIRNOFF impropers?
    g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
    g = ff.parametrize(g)
    system = esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="am1-bcc")

    # compute energies using espaloma
    import torch

    g.nodes["n1"].data["xyz"] = torch.randn(
        g.heterograph.number_of_nodes("n1"), 1, 3
    )
    esp.mm.geometry.geometry_in_graph(g.heterograph)
    esp.mm.energy.energy_in_graph(
        g.heterograph, terms=["n2", "n3", "n4", "n4_improper"], suffix="_ref"
    )

    # compute energies using OpenMM with bond, angle, and torsion breakdown
    forces = list(system.getForces())

    energies = {}

    for idx, force in enumerate(forces):
        force.setForceGroup(idx)

        name = force.__class__.__name__

        if "Nonbonded" in name:
            force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)

            # epsilons = {}
            # sigmas = {}

            # for _idx in range(force.getNumParticles()):
            #     q, sigma, epsilon = force.getParticleParameters(_idx)

            #     # record parameters
            #     epsilons[_idx] = epsilon
            #     sigmas[_idx] = sigma

            #     force.setParticleParameters(_idx, 0., sigma, epsilon)

            # def sigma_combining_rule(sig1, sig2):
            #     return (sig1 + sig2) / 2

            # def eps_combining_rule(eps1, eps2):
            #     return np.sqrt(np.abs(eps1 * eps2))

            # for _idx in range(force.getNumExceptions()):
            #     idx0, idx1, q, sigma, epsilon = force.getExceptionParameters(
            #         _idx)
            #     force.setExceptionParameters(
            #         _idx,
            #         idx0,
            #         idx1,
            #         0.0,
            #         sigma_combining_rule(sigmas[idx0], sigmas[idx1]),
            #         eps_combining_rule(epsilons[idx0], epsilons[idx1])
            #     )

            # force.updateParametersInContext(_simulation.context)

    # create new simulation
    _simulation = openmm.app.Simulation(
        g.mol.to_topology().to_openmm(),
        system,
        openmm.VerletIntegrator(0.0),
    )

    _simulation.context.setPositions(
        Quantity(
            g.nodes["n1"].data["xyz"][:, 0, :].numpy(),
            unit=esp.units.DISTANCE_UNIT,
        ).value_in_unit(unit.nanometer)
    )

    for idx, force in enumerate(forces):
        name = force.__class__.__name__

        state = _simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            groups=2**idx,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT
        )

        energies[name] = energy

    # test if bond energies are equal
    npt.assert_almost_equal(
        g.nodes["g"].data["u_n2_ref"].numpy(),
        energies["HarmonicBondForce"],
        decimal=3,
    )

    # test if angle energies are equal
    npt.assert_almost_equal(
        g.nodes["g"].data["u_n3_ref"].numpy(),
        energies["HarmonicAngleForce"],
        decimal=3,
    )

    # test if torsion energies are equal
    npt.assert_almost_equal(
        g.nodes["g"].data["u_n4_ref"].numpy()
        + g.nodes["g"].data["u_n4_improper_ref"].numpy(),
        energies["PeriodicTorsionForce"],
        decimal=3,
    )


# TODO: test that desired parameters are assigned


================================================
FILE: espaloma/graphs/tests/test_gaff_parametrize.py
================================================
import pytest
import espaloma as esp


def test_gaff_parametrize():
    ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81")
    g = esp.Graph(
        "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
    )
    ff.parametrize(g)

    print(g.nodes["n2"].data)
    print(g.nodes["n3"].data)
    print(g.nodes["n4"].data)
    print(g.nodes["n4_improper"].data)


================================================
FILE: espaloma/graphs/tests/test_graph.py
================================================
import io
import json
import pytest
import shutil
import importlib_resources
import espaloma as esp


def test_graph():
    import espaloma as esp

    g = esp.Graph("c1ccccc1")

    print(g.heterograph)


@pytest.fixture
def graph():
    import espaloma as esp

    return esp.Graph("c1ccccc1")


def test_ndata_consistency(graph):
    import torch

    assert torch.equal(graph.ndata["h0"], graph.nodes["n1"].data["h0"])


@pytest.mark.parametrize(
    "molecule, charge",
    [
        pytest.param("C", 0, id="methane"),
        pytest.param("[NH4+]", 1, id="Ammonium"),
        pytest.param("CC(=O)[O-]", -1, id="Acetate"),
    ],
)
def test_formal_charge(molecule, charge):
    import espaloma as esp

    graph = esp.Graph(molecule)
    assert graph.nodes["g"].data["sum_q"].numpy()[0] == charge


def test_save_and_load(graph):
    import tempfile

    with tempfile.TemporaryDirectory() as tempdir:
        graph.save(tempdir + "/g.esp")
        new_graph = esp.Graph.load(tempdir + "/g.esp")

    assert graph.homograph.number_of_nodes() == new_graph.homograph.number_of_nodes()

    assert graph.homograph.number_of_edges() == new_graph.homograph.number_of_edges()

def test_load_from_older_openff(tmp_path_factory):
    """Tests creating a graph from a json-serialized mol with older openff-toolkit
    version (0.10.x)

    This checks that the serialized molecule doesn't have the expected hierarchy_schemes
    key, which will be created on the fly when loaded as a graph.

    This tests creates a graph with
    """
    # Load json serialized off 0.10.6 molecule and save it in path
    from openff.toolkit import Molecule
    mol_json_path = importlib_resources.files('espaloma.data') / 'off-mol_0_10_6.json'
    with open(str(mol_json_path), "r") as json_file:
        # This loads it as a string -- seems like an off toolkit limitation
        mol_json_str = json.load(json_file)
    mol_dict = json.load(io.StringIO(mol_json_str))
    assert "hierarchy_schemes" not in mol_dict, "Serialized json mol contains unexpected key."
    # Save json molecule in path
    out_esp_dir_1 = tmp_path_factory.mktemp("esp1")
    shutil.copy(mol_json_path, out_esp_dir_1 / "mol.json")

    # update dicitonary and create espaloma graph with the same molecule
    mol_dict["hierarchy_schemes"] = dict()
    off_molecule = Molecule.from_dict(mol_dict)
    smiles = off_molecule.to_smiles()
    g = esp.Graph(smiles)
    # Save the graph
    out_esp_dir_2 = tmp_path_factory.mktemp("esp2") / "esp-test"
    g.save(str(out_esp_dir_2))
    # copy homo/hetero-graphs to original dir
    shutil.copy(out_esp_dir_2 / "homograph.bin", out_esp_dir_1)
    shutil.copy(out_esp_dir_2 / "heterograph.bin", out_esp_dir_1)

    # Load espaloma from original directory -- with mol serialized from off 0.10.6
    esp_graph = esp.Graph.load(str(out_esp_dir_1))

    assert esp_graph.mol == g.mol, f"Read molecule from esp graph, {esp_graph.mol} is not " \
                                   f"the same as the expected molecule {off_molecule}."


# TODO: test offmol_indices
# TODO: test relationship_indices_from_offmol


================================================
FILE: espaloma/graphs/tests/test_smirnoff.py
================================================
import pytest

import espaloma as esp


def test_smirnoff_esol_first():
    ff = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    )
    g = esp.data.esol(first=1)[0]
    g = ff.parametrize(g)


# def test_smirnoff_strange_mol():
#     ff = esp.graphs.legacy_force_field.LegacyForceField("smirnoff99Frosst-1.1.0")
#     g = esp.Graph(
#         "[H]c1c(nc(n(=O)c1N([H])[H])N([H])[H])N2C(C(C(C(C2([H])[H])([H])[H])([H])[H])([H])[H])([H])[H]"
#     )
#     g = ff.parametrize(g)
#
#
# def test_multi_typing():
#     ff = esp.graphs.legacy_force_field.LegacyForceField("smirnoff99Frosst-1.1.0")
#     g = esp.data.esol(first=1)[0]
#     g = ff.multi_typing(g)


================================================
FILE: espaloma/graphs/utils/__init__.py
================================================
import espaloma.graphs.utils.read_heterogeneous_graph
import espaloma.graphs.utils.read_homogeneous_graph


================================================
FILE: espaloma/graphs/utils/offmol_indices.py
================================================
import numpy as np
from openff.toolkit.topology import Molecule


def atom_indices(offmol: Molecule) -> np.ndarray:
    return np.array([a.molecule_atom_index for a in offmol.atoms])


def bond_indices(offmol: Molecule) -> np.ndarray:
    return np.array([(b.atom1_index, b.atom2_index) for b in offmol.bonds])


def angle_indices(offmol: Molecule) -> np.ndarray:
    return np.array(
        sorted(
            [
                tuple([atom.molecule_atom_index for atom in angle])
                for angle in offmol.angles
            ]
        )
    )


def proper_torsion_indices(offmol: Molecule) -> np.ndarray:
    return np.array(
        sorted(
            [
                tuple([atom.molecule_atom_index for atom in proper])
                for proper in offmol.propers
            ]
        )
    )


def _all_improper_torsion_indices(offmol: Molecule) -> np.ndarray:
    """ "[*:1]~[*:2](~[*:3])~[*:4]" matches"""

    return np.array(
        sorted(
            [
                tuple([atom.molecule_atom_index for atom in improper])
                for improper in offmol.impropers
            ]
        )
    )


def improper_torsion_indices(
    offmol: Molecule, improper_def="espaloma"
) -> np.ndarray:
    """ "[*:1]~[X3:2](~[*:3])~[*:4]" matches (_all_improper_torsion_indices returns "[*:1]~[*:2](~[*:3])~[*:4]" matches)

    improper_def allows for choosing which atom will be the central atom in the
    permutations:
    smirnoff: central atom is listed first
    espaloma: central atom is listed second

    Addtionally, for smirnoff, only take the subset of atoms that corresponds
    to the ccw traversal of connected atoms.

    Notes
    -----
    Motivation: offmol.impropers returns a large number of impropers, and we may wish to restrict this number.
    May update this filter definition based on discussion in https://github.com/openff.toolkit/openff.toolkit/issues/746
    """

    ## Find all atoms bound to exactly 3 other atoms
    if improper_def == "espaloma":
        ## This finds all orderings, which is what we want for the espaloma case
        ##  but not for smirnoff
        improper_smarts = "[*:1]~[X3:2](~[*:3])~[*:4]"
        mol_idxs = offmol.chemical_environment_matches(improper_smarts)
        return np.array(mol_idxs)
    elif improper_def == "smirnoff":
        improper_smarts = "[*:2]~[X3:1](~[*:3])~[*:4]"
        ## For smirnoff ordering, we only want to find the unique combinations
        ##  of atoms forming impropers so we can permute them the way we want
        mol_idxs = offmol.chemical_environment_matches(
            improper_smarts, unique=True
        )

        ## Get all ccw orderings
        # feels like there should be some good way to do this with itertools...
        idx_permuts = []
        for c, *other_atoms in mol_idxs:
            for i in range(3):
                idx = [c]
                for j in range(3):
                    idx.append(other_atoms[(i + j) % 3])
                idx_permuts.append(tuple(idx))

        return np.array(idx_permuts)
    else:
        raise ValueError(f"Unknown value for improper_def: {improper_def}")


================================================
FILE: espaloma/graphs/utils/read_heterogeneous_graph.py
================================================
""" Build heterogeneous graph from homogeneous ones.

"""
# =============================================================================
# IMPORTS
# =============================================================================
import numpy as np
import torch
from espaloma.graphs.utils import offmol_indices
from openff.toolkit.topology import Molecule
from typing import Dict

# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================


def duplicate_index_ordering(indices: np.ndarray) -> np.ndarray:
    """For every (a,b,c,d) add a (d,c,b,a)

    TODO: is there a way to avoid this duplication?

    >>> indices = np.array([[0, 1, 2, 3], [1, 2, 3, 4]])
    >>> duplicate_index_ordering(indices)
    array([[0, 1, 2, 3],
           [1, 2, 3, 4],
           [3, 2, 1, 0],
           [4, 3, 2, 1]])
    """
    return np.concatenate([indices, np.flip(indices, axis=-1)], axis=0)


def relationship_indices_from_offmol(
    offmol: Molecule,
) -> Dict[str, torch.Tensor]:
    """Construct a dictionary that maps node names (like "n2") to torch tensors of indices

    Notes
    -----
    * introduces 2x redundant indices (including (d,c,b,a) for every (a,b,c,d)) for compatibility with later processing
    """
    idxs = dict()
    idxs["n1"] = offmol_indices.atom_indices(offmol)
    idxs["n2"] = offmol_indices.bond_indices(offmol)
    idxs["n3"] = offmol_indices.angle_indices(offmol)
    idxs["n4"] = offmol_indices.proper_torsion_indices(offmol)
    idxs["n4_improper"] = offmol_indices.improper_torsion_indices(offmol)

    if len(idxs["n4"]) == 0:
        idxs["n4"] = np.empty((0, 4))

    if len(idxs["n4_improper"]) == 0:
        idxs["n4_improper"] = np.empty((0, 4))

    # TODO: enumerate indices for coupling-term nodes also
    # TODO: big refactor of term names from "n4" to "proper_torsion", "improper_torsion", "angle_angle_coupling", etc.

    # TODO (discuss with YW) : I think "n1" and "n4_improper" shouldn't be 2x redundant in current scheme
    #   (also, unclear why we need "n2", "n3", "n4" to be 2x redundant, but that's something to consider changing later)
    for key in ["n2", "n3", "n4"]:
        idxs[key] = duplicate_index_ordering(idxs[key])

    # make them all torch.Tensors
    for key in idxs:
        idxs[key] = torch.from_numpy(idxs[key])

    return idxs


def from_homogeneous_and_mol(g, offmol):
    r"""Build heterogeneous graph from homogeneous ones.


    Note
    ----
    For now we name single node, two-, three, and four-,
    hypernodes as `n1`, `n2`, `n3`, and `n4`. These correspond
    to atom, bond, angle, and torsion nodes in chemical graphs.


    Parameters
    ----------
    g : `espaloma.HomogeneousGraph` object
        the homogeneous graph to be translated.

    Returns
    -------
    hg : `espaloma.HeterogeneousGraph` object
        the resulting heterogeneous graph.

    """

    # initialize empty dictionary
    hg = {}

    # get adjacency matrix
    a = g.adjacency_matrix()

    # get all the indices
    idxs = relationship_indices_from_offmol(offmol)

    # make them all numpy
    idxs = {key: value.numpy() for key, value in idxs.items()}

    # also include n1
    idxs["n1"] = np.arange(g.number_of_nodes())[:, None]

    # =========================
    # neighboring relationships
    # =========================
    # NOTE:
    # here we only define the neighboring relationship
    # on atom level
    hg[("n1", "n1_neighbors_n1", "n1")] = idxs["n2"]

    # build a mapping between indices and the ordering
    idxs_to_ordering = {}

    for term in ["n1", "n2", "n3", "n4", "n4_improper"]:
        idxs_to_ordering[term] = {
            tuple(subgraph_idxs): ordering
            for (ordering, subgraph_idxs) in enumerate(list(idxs[term]))
        }

    # ===============================================
    # relationships between nodes of different levels
    # ===============================================
    # NOTE:
    # here we define all the possible
    # 'has' and 'in' relationships.
    # TODO:
    # we'll test later to see if this adds too much overhead
    #

    for small_idx in range(1, 5):
        for big_idx in range(small_idx + 1, 5):
            for pos_idx in range(big_idx - small_idx + 1):

                hg[
                    (
                        "n%s" % small_idx,
                        "n%s_as_%s_in_n%s" % (small_idx, pos_idx, big_idx),
                        "n%s" % big_idx,
                    )
                ] = np.stack(
                    [
                        np.array(
                            [
                                idxs_to_ordering["n%s" % small_idx][tuple(x)]
                                for x in idxs["n%s" % big_idx][
                                    :, pos_idx : pos_idx + small_idx
                                ]
                            ]
                        ),
                        np.arange(idxs["n%s" % big_idx].shape[0]),
                    ],
                    axis=1,
                )

                hg[
                    (
                        "n%s" % big_idx,
                        "n%s_has_%s_n%s" % (big_idx, pos_idx, small_idx),
                        "n%s" % small_idx,
                    )
                ] = np.stack(
                    [
                        np.arange(idxs["n%s" % big_idx].shape[0]),
                        np.array(
                            [
                                idxs_to_ordering["n%s" % small_idx][tuple(x)]
                                for x in idxs["n%s" % big_idx][
                                    :, pos_idx : pos_idx + small_idx
                                ]
                            ]
                        ),
                    ],
                    axis=1,
                )

    # ======================================
    # nonbonded terms
    # ======================================
    # NOTE: everything is counted twice here
    # nonbonded is where
    # $A = AA = AAA = AAAA = 0$

    # make dense
    a_ = a.to_dense().detach().numpy()

    idxs["nonbonded"] = np.stack(
        np.where(np.equal(a_ + a_ @ a_ + a_ @ a_ @ a_, 0.0)),
        axis=-1,
    )

    # onefour is the two ends of torsion
    # idxs["onefour"] = np.stack(
    #     [
    #         idxs["n4"][:, 0],
    #         idxs["n4"][:, 3],
    #     ],
    #     axis=1,
    # )

    idxs["onefour"] = np.stack(
        np.where(
            np.equal(a_ + a_ @ a_, 0.0) * np.greater(a_ @ a_ @ a_, 0.0),
        ),
        axis=-1,
    )

    # membership
    for term in ["nonbonded", "onefour"]:
        for pos_idx in [0, 1]:
            hg[(term, "%s_has_%s_n1" % (term, pos_idx), "n1")] = np.stack(
                [np.arange(idxs[term].shape[0]), idxs[term][:, pos_idx]],
                axis=-1,
            )

            hg[("n1", "n1_as_%s_in_%s" % (pos_idx, term), term)] = np.stack(
                [
                    idxs[term][:, pos_idx],
                    np.arange(idxs[term].shape[0]),
                ],
                axis=-1,
            )

    # membership of n1 in n4_improper
    for term in ["n4_improper"]:
        for pos_idx in [0, 1, 2, 3]:
            hg[(term, "%s_has_%s_n1" % (term, pos_idx), "n1")] = np.stack(
                [np.arange(idxs[term].shape[0]), idxs[term][:, pos_idx]],
                axis=-1,
            )

            hg[("n1", "n1_as_%s_in_%s" % (pos_idx, term), term)] = np.stack(
                [
                    idxs[term][:, pos_idx],
                    np.arange(idxs[term].shape[0]),
                ],
                axis=-1,
            )

    # ======================================
    # relationships between nodes and graphs
    # ======================================
    for term in [
        "n1",
        "n2",
        "n3",
        "n4",
        "n4_improper",
        "nonbonded",
        "onefour",
    ]:
        hg[(term, "%s_in_g" % term, "g",)] = np.stack(
            [np.arange(len(idxs[term])), np.zeros(len(idxs[term]))],
            axis=1,
        )

        hg[("g", "g_has_%s" % term, term)] = np.stack(
            [
                np.zeros(len(idxs[term])),
                np.arange(len(idxs[term])),
            ],
            axis=1,
        )

    import dgl

    hg = dgl.heterograph(
        {key: value.astype(np.int32).tolist() for key, value in hg.items()}
    )

    hg.nodes["n1"].data["h0"] = g.ndata["h0"]
    hg.nodes["g"].data["sum_q"] = g.ndata["sum_q"][0].reshape(1, 1)
    # include indices in the nodes themselves
    for term in [
        "n1",
        "n2",
        "n3",
        "n4",
        "n4_improper",
        "onefour",
        "nonbonded",
    ]:
        hg.nodes[term].data["idxs"] = torch.tensor(idxs[term])

    return hg


================================================
FILE: espaloma/graphs/utils/read_homogeneous_graph.py
================================================
""" Build simple graph from OpenEye or RDKit molecule object.

"""
# =============================================================================
# IMPORTS
# =============================================================================
import torch

# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================


def fp_oe(atom):
    from openeye import oechem

    HYBRIDIZATION_OE = {
        oechem.OEHybridization_sp: torch.tensor(
            [1, 0, 0, 0, 0], dtype=torch.get_default_dtype()
        ),
        oechem.OEHybridization_sp2: torch.tensor(
            [0, 1, 0, 0, 0], dtype=torch.get_default_dtype()
        ),
        oechem.OEHybridization_sp3: torch.tensor(
            [0, 0, 1, 0, 0], dtype=torch.get_default_dtype()
        ),
        oechem.OEHybridization_sp3d: torch.tensor(
            [0, 0, 0, 1, 0], dtype=torch.get_default_dtype()
        ),
        oechem.OEHybridization_sp3d2: torch.tensor(
            [0, 0, 0, 0, 1], dtype=torch.get_default_dtype()
        ),
        oechem.OEHybridization_Unknown: torch.tensor(
            [0, 0, 0, 0, 0], dtype=torch.get_default_dtype()
        ),
    }
    return torch.cat(
        [
            torch.tensor(
                [
                    atom.GetDegree(),
                    # Note: Discard resonance-variant features
                    # Issue related to https://github.com/choderalab/espaloma_charge/issues/18
                    # atom.GetValence(),
                    # atom.GetExplicitValence(),
                    # atom.GetFormalCharge(),
                    atom.IsAromatic() * 1.0,
                    atom.GetIsotope(),  # TODO: is this a good idea?
                    oechem.OEAtomIsInRingSize(atom, 3) * 1.0,
                    oechem.OEAtomIsInRingSize(atom, 4) * 1.0,
                    oechem.OEAtomIsInRingSize(atom, 5) * 1.0,
                    oechem.OEAtomIsInRingSize(atom, 6) * 1.0,
                    oechem.OEAtomIsInRingSize(atom, 7) * 1.0,
                    oechem.OEAtomIsInRingSize(atom, 8) * 1.0,
                ],
                dtype=torch.float32,
            ),
            HYBRIDIZATION_OE[atom.GetHyb()],
        ],
        dim=0,
    )


def fp_rdkit(atom):
    from rdkit import Chem

    HYBRIDIZATION_RDKIT = {
        Chem.rdchem.HybridizationType.SP: torch.tensor(
            [1, 0, 0, 0, 0],
            dtype=torch.get_default_dtype(),
        ),
        Chem.rdchem.HybridizationType.SP2: torch.tensor(
            [0, 1, 0, 0, 0],
            dtype=torch.get_default_dtype(),
        ),
        Chem.rdchem.HybridizationType.SP3: torch.tensor(
            [0, 0, 1, 0, 0],
            dtype=torch.get_default_dtype(),
        ),
        Chem.rdchem.HybridizationType.SP3D: torch.tensor(
            [0, 0, 0, 1, 0],
            dtype=torch.get_default_dtype(),
        ),
        Chem.rdchem.HybridizationType.SP3D2: torch.tensor(
            [0, 0, 0, 0, 1],
            dtype=torch.get_default_dtype(),
        ),
        Chem.rdchem.HybridizationType.S: torch.tensor(
            [0, 0, 0, 0, 0],
            dtype=torch.get_default_dtype(),
        ),
    }
    return torch.cat(
        [
            torch.tensor(
                [
                    atom.GetTotalDegree(),
                    # Note: Discard resonance-variant features
                    # Issue related to https://github.com/choderalab/espaloma_charge/issues/18
                    # atom.GetTotalValence(),
                    # atom.GetExplicitValence(),
                    # atom.GetFormalCharge(),
                    atom.GetIsAromatic() * 1.0,
                    atom.GetMass(),
                    atom.IsInRingSize(3) * 1.0,
                    atom.IsInRingSize(4) * 1.0,
                    atom.IsInRingSize(5) * 1.0,
                    atom.IsInRingSize(6) * 1.0,
                    atom.IsInRingSize(7) * 1.0,
                    atom.IsInRingSize(8) * 1.0,
                ],
                dtype=torch.get_default_dtype(),
            ),
            HYBRIDIZATION_RDKIT[atom.GetHybridization()],
        ],
        dim=0,
    )


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def from_openff_toolkit_mol(mol, use_fp=True):
    import dgl

    # initialize graph
    g = dgl.DGLGraph()

    # enter nodes
    n_atoms = mol.n_atoms
    g.add_nodes(n_atoms)
    g.ndata["type"] = torch.Tensor(
        [[atom.atomic_number] for atom in mol.atoms]
    )
    total_charge = mol.total_charge.magnitude
    g.ndata["sum_q"] = torch.Tensor(
        [[total_charge] for _ in range(mol.n_atoms)]
    )
    h_v = torch.zeros(
        g.ndata["type"].shape[0], 100, dtype=torch.get_default_dtype()
    )

    h_v[
        torch.arange(g.ndata["type"].shape[0]),
        torch.squeeze(g.ndata["type"]).long(),
    ] = 1.0

    h_v_fp = torch.stack(
        [fp_rdkit(atom) for atom in mol.to_rdkit().GetAtoms()], axis=0
    )

    if use_fp == True:
        h_v = torch.cat([h_v, h_v_fp], dim=-1)  # (n_atoms, 117)

    g.ndata["h0"] = h_v

    # enter bonds
    bonds = list(mol.bonds)
    bonds_begin_idxs = [bond.atom1_index for bond in bonds]
    bonds_end_idxs = [bond.atom2_index for bond in bonds]
    bonds_types = [bond.bond_order for bond in bonds]

    # NOTE: dgl edges are directional
    g.add_edges(bonds_begin_idxs, bonds_end_idxs)
    g.add_edges(bonds_end_idxs, bonds_begin_idxs)

    # g.edata["type"] = torch.Tensor(bonds_types)[:, None].repeat(2, 1)

    return g


def from_oemol(mol, use_fp=True):
    import dgl

    # initialize graph
    g = dgl.DGLGraph()

    # enter nodes
    n_atoms = mol.NumAtoms()
    g.add_nodes(n_atoms)
    g.ndata["type"] = torch.Tensor(
        [[atom.GetAtomicNum()] for atom in mol.GetAtoms()]
    )
    g.ndata["formal_charge"] = torch.Tensor(
        [[atom.GetFormalCharge()] for atom in mol.GetAtoms()]
    )
    h_v = torch.zeros(g.ndata["type"].shape[0], 100, dtype=torch.float32)

    h_v[
        torch.arange(g.ndata["type"].shape[0]),
        torch.squeeze(g.ndata["type"]).long(),
    ] = 1.0

    h_v_fp = torch.stack([fp_oe(atom) for atom in mol.GetAtoms()], axis=0)

    if use_fp == True:
        h_v = torch.cat([h_v, h_v_fp], dim=-1)  # (n_atoms, 117)

    g.ndata["h0"] = h_v

    # enter bonds
    bonds = list(mol.GetBonds())
    bonds_begin_idxs = [bond.GetBgnIdx() for bond in bonds]
    bonds_end_idxs = [bond.GetEndIdx() for bond in bonds]
    bonds_types = [bond.GetOrder() for bond in bonds]

    # NOTE: dgl edges are directional
    g.add_edges(bonds_begin_idxs, bonds_end_idxs)
    g.add_edges(bonds_end_idxs, bonds_begin_idxs)

    # g.edata["type"] = torch.Tensor(bonds_types)[:, None].repeat(2, 1)

    return g


def from_rdkit_mol(mol, use_fp=True):
    import dgl

    # initialize graph
    g = dgl.DGLGraph()

    # enter nodes
    n_atoms = mol.GetNumAtoms()
    g.add_nodes(n_atoms)
    g.ndata["type"] = torch.Tensor(
        [[atom.GetAtomicNum()] for atom in mol.GetAtoms()]
    )
    g.ndata["formal_charge"] = torch.Tensor(
        [[atom.GetFormalCharge()] for atom in mol.GetAtoms()]
    )
    h_v = torch.zeros(g.ndata["type"].shape[0], 100, dtype=torch.float32)

    h_v[
        torch.arange(g.ndata["type"].shape[0]),
        torch.squeeze(g.ndata["type"]).long(),
    ] = 1.0

    h_v_fp = torch.stack([fp_rdkit(atom) for atom in mol.GetAtoms()], axis=0)

    if use_fp == True:
        h_v = torch.cat([h_v, h_v_fp], dim=-1)  # (n_atoms, 117)

    g.ndata["h0"] = h_v

    # enter bonds
    bonds = list(mol.GetBonds())
    bonds_begin_idxs = [bond.GetBeginAtomIdx() for bond in bonds]
    bonds_end_idxs = [bond.GetEndAtomIdx() for bond in bonds]
    bonds_types = [bond.GetBondType().real for bond in bonds]

    # NOTE: dgl edges are directional
    g.add_edges(bonds_begin_idxs, bonds_end_idxs)
    g.add_edges(bonds_end_idxs, bonds_begin_idxs)

    # g.edata["type"] = torch.Tensor(bonds_types)[:, None].repeat(2, 1)

    return g


================================================
FILE: espaloma/graphs/utils/regenerate_impropers.py
================================================
import dgl
import numpy as np
import torch

from .offmol_indices import improper_torsion_indices
from ..graph import Graph


def regenerate_impropers(g: Graph, improper_def="smirnoff"):
    """
    Method to regenerate the improper nodes according to the specified
    method of permuting the impropers. Modifies the esp.Graph's heterograph
    in place and returns the new heterograph.
    NOTE: This will clear the data on all n4_improper nodes, including
    previously generated improper from JanossyPoolingImproper.
    """

    ## First get rid of the old nodes/edges
    hg = g.heterograph
    hg = dgl.remove_nodes(hg, hg.nodes("n4_improper"), "n4_improper")

    ## Generate new improper torsion permutations
    idxs = improper_torsion_indices(g.mol, improper_def)
    if len(idxs) == 0:
        return g

    ## Add new nodes of type n4_improper (one for each permut)
    hg = dgl.add_nodes(hg, idxs.shape[0], ntype="n4_improper")

    ## New edges b/n improper permuts and n1 nodes
    permut_ids = np.arange(idxs.shape[0])
    for i in range(4):
        n1_ids = idxs[:, i]

        # edge from improper node to n1 node
        outgoing_etype = ("n4_improper", f"n4_improper_has_{i}_n1", "n1")
        hg = dgl.add_edges(hg, permut_ids, n1_ids, etype=outgoing_etype)

        # edge from n1 to improper
        incoming_etype = ("n1", f"n1_as_{i}_in_n4_improper", "n4_improper")
        hg = dgl.add_edges(hg, n1_ids, permut_ids, etype=incoming_etype)

    ## New edges b/n improper permuts and the graph (for global pooling)
    # edge from improper node to graph
    outgoing_etype = ("n4_improper", f"n4_improper_in_g", "g")
    hg = dgl.add_edges(
        hg, permut_ids, np.zeros_like(permut_ids), etype=outgoing_etype
    )

    # edge from graph to improper nodes
    incoming_etype = ("g", "g_has_n4_improper", "n4_improper")
    hg = dgl.add_edges(
        hg, np.zeros_like(permut_ids), permut_ids, etype=incoming_etype
    )

    hg.nodes["n4_improper"].data["idxs"] = torch.tensor(idxs)

    g.heterograph = hg

    return g  # hg


================================================
FILE: espaloma/metrics.py
================================================
""" Metrics to evaluate and train models.

"""
import abc

# =============================================================================
# IMPORTS
# =============================================================================
import torch
import numpy as np
from .units import GAS_CONSTANT

# =============================================================================
# HELPER FUNCTIONS
# =============================================================================
def center(metric, dim=1, reduction="none"):
    def _centered(input, target, metric=metric, dim=dim):
        # center input
        input = input - input.mean(dim=dim, keepdim=True)

        # center target
        target = target - target.mean(dim=dim, keepdim=True)

        if reduction == "none":
            return metric(input, target)
        else:
            return getattr(torch, reduction)(metric(input, target))

    return _centered


def boltzmann_weighted(metric, reduction="mean", temperature=300.0):
    def _weighted(input, target, metric=metric, reduction=reduction):
        _loss = metric(input, target)

        min_target, _ = torch.min(target, dim=-1, keepdims=True)
        delta_target = target - min_target

        weight_target = torch.softmax(
            -delta_target / (GAS_CONSTANT * temperature),
            dim=-1,
        )

        _loss = _loss * weight_target

        return getattr(torch, reduction)(_loss)

    return _weighted


def std(metric, weight=1.0, dim=1):
    def _std(input, target, metric=metric, weight=weight, dim=dim):
        return weight * metric(input, target).std(dim=dim).sum()

    return _std


def weighted(metric, weight, reduction="mean"):
    def _weighted(
        input, target, metric=metric, weight=weight, reduction=reduction
    ):
        _loss = metric(input, target)
        for _ in range(_loss.dims() - 1):
            weight = weight.unsqueeze(-1)
        return getattr(torch, reduction)(weight)

    return _weighted


def weighted_with_key(metric, key="weight", reduction="mean"):
    def _weighted(input, target, metric=metric, key=key, reduction=reduction):
        weight = target.nodes["g"].data[key].flatten()
        _loss = metric(input, target)
        for _ in range(_loss.dims() - 1):
            weight = weight.unsqueeze(-1)
        return getattr(torch, reduction)(weight)

    return _weighted


def bootstrap(metric, n_samples=100, ci=0.95):
    def _bootstrap(input, target, metric=metric, n_samples=n_samples, ci=ci):
        original = metric(input=input, target=target)

        idxs_all = np.arange(input.shape[0])
        results = []
        for _ in range(n_samples):
            idxs = np.random.choice(
                idxs_all,
                len(idxs_all),
                replace=True,
            )

            _metric = (
                metric(input=input[idxs], target=target[idxs])
                .detach()
                .cpu()
                .numpy()
                .item()
            )

            results.append(
                _metric,
            )

        results = np.array(results)

        low = np.percentile(results, 100.0 * 0.5 * (1 - ci))
        high = np.percentile(results, (1 - ((1 - ci) * 0.5)) * 100.0)

        return original.detach().cpu().numpy().item(), low, high

    return _bootstrap


def latex_format_ci(original, low, high):
    return "$%.4f_{%.4f}^{%.4f}$" % (original, low, high)


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def mse(input, target):
    return torch.nn.functional.mse_loss(target, input)


def mape(input, target):
    return ((input - target).abs() / target.abs()).mean()


def rmse(input, target):
    return torch.sqrt(torch.nn.functional.mse_loss(target, input))


def mae_of_log(input, target):
    return torch.nn.L1Loss()(torch.log(input), torch.log(target))


def cross_entropy(input, target, reduction="mean"):
    loss_fn = torch.nn.CrossEntropyLoss(reduction=reduction)
    return loss_fn(input=input, target=target)  # prediction first, logit


def r2(input, target):
    target = target.flatten()
    input = input.flatten()
    ss_tot = (target - target.mean()).pow(2).sum()
    ss_res = (input - target).pow(2).sum()
    return 1 - torch.div(ss_res, ss_tot)


def accuracy(input, target):
    # check if this is logit
    if input.dim() == 2 and input.shape[-1] > 1:
        input = input.argmax(dim=-1)

    return torch.div((input == target).sum().double(), target.shape[0])


# =============================================================================
# MODULE CLASSES
# =============================================================================
class Metric(torch.nn.modules.loss._Loss):
    """Base function for loss."""

    def __init__(self, size_average=None, reduce=None, reduction="mean"):
        super(Metric, self).__init__(size_average, reduce, reduction)

    @abc.abstractmethod
    def forward(self, *args, **kwargs):
        raise NotImplementedError


class GraphMetric(Metric):
    """Loss between nodes attributes of graph or graphs.

    Parameters
    ----------
    base_metric : callable
        Metric on fixed dimensional space.

    between : List[str]
        Names of quantities to compare.

    level : str
        Level of nodes to compare with.

    Returns
    -------
    torch.Tensor
    """

    def __init__(self, base_metric, between, level="n1", *args, **kwargs):
        super(GraphMetric, self).__init__(*args, **kwargs)

        # between could be tuple of two strings or two functions
        assert len(between) == 2

        self.between = (
            self._translation(between[0], level),
            self._translation(between[1], level),
        )

        self.base_metric = base_metric

        # get name
        if hasattr(base_metric, "__name__"):
            base_name = base_metric.__name__
        else:
            base_name = base_metric.__class__.__name__

        self.__name__ = "%s_between_%s_and_%s_on_%s" % (
            base_name,
            between[0],
            between[1],
            level,
        )

    @staticmethod
    def _translation(string, level):
        return lambda g: g.nodes[level].data[string]

    def forward(self, g_input, g_target=None):
        """Forward function of loss."""
        # allow loss within self
        if g_target is None:
            g_target = g_input

        # get input and output transform function
        input_fn, target_fn = self.between

        # compute loss using base loss
        # NOTE:
        # use keyward argument here since torch is bad with the order with args
        return self.base_metric(
            input=input_fn(g_input), target=target_fn(g_target)
        )


class GraphDerivativeMetric(Metric):
    """Loss between nodes attributes of graph or graphs."""

    def __init__(
        self,
        base_metric,
        between,
        weight=1.0,
        level="n1",
        d="xyz",
        d_level="n1",
        *args,
        **kwargs
    ):
        super(GraphDerivativeMetric, self).__init__(*args, **kwargs)

        # between could be tuple of two strings or two functions
        assert len(between) == 2

        self.between = (
            self._translation(between[0], level),
            self._translation(between[1], level),
        )

        self.d = self._translation(d, d_level)

        self.base_metric = base_metric
        self.weight = weight
        # get name
        if hasattr(base_metric, "__name__"):
            base_name = base_metric.__name__
        else:
            base_name = base_metric.__class__.__name__

        self.__name__ = "%s_between_d_%s_d_%s_and_d_%s_d_%s_on_%s" % (
            base_name,
            between[0],
            d,
            between[1],
            d,
            level,
        )

    @staticmethod
    def _translation(string, level):
        return lambda g: g.nodes[level].data[string]

    def forward(self, g_input, g_target=None):
        """Forward function of loss."""
        # allow loss within self
        if g_target is None:
            g_target = g_input

        # get input and output transform function
        input_fn, target_fn = self.between

        # calculate the derivatives of input
        input_prime = torch.autograd.grad(
            input_fn(g_input).sum(),
            self.d(g_input),
            create_graph=True,
            retain_graph=True,
            allow_unused=True,
        )[0]

        target_prime = torch.autograd.grad(
            target_fn(g_target).sum(),
            self.d(g_target),
            create_graph=True,
            retain_graph=True,
            allow_unused=True,
        )[0]

        # compute loss using base loss
        # NOTE:
        # use keyward argument here since torch is bad with the order with args
        return self.weight * self.base_metric(
            input=input_prime,
            target=target_prime,
        )


class GraphHalfDerivativeMetric(Metric):
    """Loss between nodes attributes of graph or graphs."""

    def __init__(
        self,
        base_metric,
        input_level="g",
        input_name="u",
        target_prime_level="n1",
        target_prime_name="u_ref_prime",
        d="xyz",
        d_level="n1",
        weight=1.0,
        *args,
        **kwargs
    ):
        super(GraphHalfDerivativeMetric, self).__init__(*args, **kwargs)

        # define query functions
        self.d = self._translation(d, d_level)
        self.input_fn = self._translation(input_name, input_level)
        self.target_prime_fn = self._translation(
            target_prime_name, target_prime_level
        )

        self.base_metric = base_metric
        self.weight = weight
        # get name
        if hasattr(base_metric, "__name__"):
            base_name = base_metric.__name__
        else:
            base_name = base_metric.__class__.__name__

        self.__name__ = "%s_between_%s_d_%s_on_%s_and_%s_on_%s" % (
            base_name,
            input_name,
            d,
            input_level,
            target_prime_name,
            target_prime_level,
        )

    @staticmethod
    def _translation(string, level):
        return lambda g: g.nodes[level].data[string]

    def forward(self, g_input, g_target=None):
        """Forward function of loss."""
        # allow loss within self
        if g_target is None:
            g_target = g_input

        # calculate the derivatives of input
        input_prime = torch.autograd.grad(
            self.input_fn(g_input).sum(),
            self.d(g_input),
            create_graph=True,
            retain_graph=True,
        )[0]

        target_prime = self.target_prime_fn(g_target)

        # compute loss using base loss
        # NOTE:
        # use keyward argument here since torch is bad with the order with args
        return self.weight * self.base_metric(
            input=input_prime,
            target=target_prime,
        )


# =============================================================================
# PRESETS
# =============================================================================


class TypingCrossEntropy(GraphMetric):
    def __init__(self):
        super(TypingCrossEntropy, self).__init__(
            base_metric=torch.nn.CrossEntropyLoss(),
            between=["nn_typing", "legacy_typing"],
        )

        self.__name__ = "TypingCrossEntropy"


class TypingAccuracy(GraphMetric):
    def __init__(self):
        super(TypingAccuracy, self).__init__(
            base_metric=accuracy, between=["nn_typing", "legacy_typing"]
        )

        self.__name__ = "TypingAccuracy"


class BondKMSE(GraphMetric):
    def __init__(self):
        super(BondKMSE, self).__init__(
            between=["k_ref", "k"], level="n2", base_metric=torch.nn.MSELoss()
        )

        self.__name__ = "BondKMSE"


class BondKRMSE(GraphMetric):
    def __init__(self):
        super(BondKRMSE, self).__init__(
            between=["k_ref", "k"], level="n2", base_metric=rmse
        )

        self.__name__ = "BondKRMSE"


================================================
FILE: espaloma/mm/__init__.py
================================================
from . import angle, bond, energy, functional, geometry, nonbonded, torsion


================================================
FILE: espaloma/mm/angle.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import espaloma as esp


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def harmonic_angle(x, k, eq):
    """Harmonic angle energy.

    Parameters
    ----------
    x : `torch.Tensor`, `shape = (batch_size, 1)`
        angle value
    k : `torch.Tensor`, `shape = (batch_size, 1)`
        force constant
    eq : `torch.Tensor`, `shape = (batch_size, 1)`
        equilibrium angle

    Returns
    -------
    u : `torch.Tensor`, `shape = (batch_size, 1)`
        energy

    """
    # NOTE:
    # the constant 0.5 is included here but not in the functional forms

    # NOTE:
    # 0.25 because all angles are calculated twice
    return 0.5 * esp.mm.functional.harmonic(x=x, k=k, eq=eq)


def linear_mixture_angle(x, coefficients, phases):
    """Angle energy with Linear basis function.

    Parameters
    ----------
    coefficients : torch.Tensor
        Coefficients of the linear mixuture.

    phases : torch.Tensor
        Phases of the linear mixture.

    """

    return 0.5 * esp.mm.functional.linear_mixture(
        x=x, coefficients=coefficients, phases=phases
    )


def urey_bradley(x_between, coefficients, phases):
    return esp.mm.functional.linear_mixture(
        x=x_between,
        coefficients=coefficients,
        phases=phases,
    )


def bond_bond(u_left, u_right, k_bond_bond):
    u_left = u_left - u_left.min(dim=-1, keepdims=True)[0]
    u_right = u_right - u_right.min(dim=-1, keepdims=True)[0]
    return k_bond_bond * (u_left**0.5) * (u_right**0.5)


def bond_angle(
    u_left,
    u_right,
    u_angle,
    k_bond_angle,
):

    u_left = u_left - u_left.min(dim=-1, keepdims=True)[0]
    u_right = u_right - u_right.min(dim=-1, keepdims=True)[0]
    u_angle = u_angle - u_angle.min(dim=-1, keepdims=True)[0]
    return k_bond_angle * (u_left**0.5) * (
        u_angle**0.5
    ) + k_bond_angle * (u_right**0.5) * (u_angle**0.5)


def angle_high(
    u_angle,
    k3,
    k4,
):
    u_angle = u_angle - u_angle.min(dim=-1, keepdims=True)[0]
    return k3 * u_angle**1.5 + k4 * u_angle**2


================================================
FILE: espaloma/mm/bond.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import espaloma as esp


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def harmonic_bond(x, k, eq):
    """Harmonic bond energy.

    Parameters
    ----------
    x : `torch.Tensor`, `shape = (batch_size, 1)`
        bond length
    k : `torch.Tensor`, `shape = (batch_size, 1)`
        force constant
    eq : `torch.Tensor`, `shape = (batch_size, 1)`
        equilibrium bond length

    Returns
    -------
    u : `torch.Tensor`, `shape = (batch_size, 1)`
        energy

    """
    # NOTE:
    # the constant is included here but not in the functional forms

    # NOTE:
    # 0.25 because all bonds are calculated twice
    return 0.5 * esp.mm.functional.harmonic(x=x, k=k, eq=eq)


def gaussian_bond(x, coefficients):
    """Bond energy with Gaussian basis function."""
    return esp.mm.functional.gaussian(
        x=x,
        coefficients=coefficients,
    )


def linear_mixture_bond(x, coefficients, phases):
    """Bond energy with Linear basis function.

    Parameters
    ----------
    coefficients : torch.Tensor
        Coefficients of the linear mixuture.

    phases : torch.Tensor
        Phases of the linear mixture.

    """
    return 0.5 * esp.mm.functional.linear_mixture(
        x=x, coefficients=coefficients, phases=phases
    )


def bond_high(u_bond, k3, k4):
    u_bond = u_bond - u_bond.min(dim=-1, keepdims=True)[0]
    return k3 * u_bond**1.5 + k4 * u_bond**2


================================================
FILE: espaloma/mm/energy.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch

import espaloma as esp


# =============================================================================
# ENERGY IN HYPERNODES---BONDED
# =============================================================================
def apply_bond(nodes, suffix=""):
    """Bond energy in nodes."""
    # if suffix == '_ref':
    return {
        "u%s"
        % suffix: esp.mm.bond.harmonic_bond(
            x=nodes.data["x"],
            k=nodes.data["k%s" % suffix],
            eq=nodes.data["eq%s" % suffix],
        )
    }

    # else:
    #     return {
    #         'u%s' % suffix: esp.mm.bond.harmonic_bond_re(
    #             x=nodes.data['x'],
    #             k=nodes.data['k%s' % suffix],
    #             eq=nodes.data['eq%s' % suffix],
    #         )
    #     }


def apply_angle(nodes, suffix=""):
    """Angle energy in nodes."""
    return {
        "u%s"
        % suffix: esp.mm.angle.harmonic_angle(
            x=nodes.data["x"],
            k=nodes.data["k%s" % suffix],
            eq=nodes.data["eq%s" % suffix],
        )
    }


def apply_angle_ii(nodes, suffix=""):
    return {
        # "u_angle_high%s"
        # % suffix: esp.mm.angle.angle_high(
        #     u_angle=nodes.data["u"],
        #     k3=nodes.data["k3"],
        #     k4=nodes.data["k4"],
        # ),
        "u_urey_bradley%s"
        % suffix: esp.mm.angle.urey_bradley(
            x_between=nodes.data["x_between"],
            coefficients=nodes.data["coefficients_urey_bradley"],
            phases=[0.0, 12.0],
        ),
        "u_bond_bond%s"
        % suffix: esp.mm.angle.bond_bond(
            u_left=nodes.data["u_left"],
            u_right=nodes.data["u_right"],
            k_bond_bond=nodes.data["k_bond_bond"],
        ),
        "u_bond_angle%s"
        % suffix: esp.mm.angle.bond_angle(
            u_left=nodes.data["u_left"],
            u_right=nodes.data["u_right"],
            u_angle=nodes.data["u"],
            k_bond_angle=nodes.data["k_bond_angle"],
        ),
    }


def apply_bond_ii(nodes, suffix=""):
    return {
        "u_bond_high%s"
        % suffix: esp.mm.bond.bond_high(
            u_bond=nodes.data["u"],
            k3=nodes.data["k3"],
            k4=nodes.data["k4"],
        )
    }


def apply_torsion_ii(nodes, suffix=""):
    """Torsion energy in nodes."""
    return {
        "u_angle_angle%s"
        % suffix: esp.mm.torsion.angle_angle(
            u_angle_left=nodes.data["u_angle_left"],
            u_angle_right=nodes.data["u_angle_right"],
            k_angle_angle=nodes.data["k_angle_angle"],
        ),
        "u_angle_torsion%s"
        % suffix: esp.mm.torsion.angle_torsion(
            u_angle_left=nodes.data["u_angle_left"],
            u_angle_right=nodes.data["u_angle_right"],
            u_torsion=nodes.data["u"],
            k_angle_torsion=nodes.data["k_angle_torsion"],
        ),
        "u_angle_angle_torsion%s"
        % suffix: esp.mm.torsion.angle_angle_torsion(
            u_angle_left=nodes.data["u_angle_left"],
            u_angle_right=nodes.data["u_angle_right"],
            u_torsion=nodes.data["u"],
            k_angle_angle_torsion=nodes.data["k_angle_angle_torsion"],
        ),
        "u_bond_torsion%s"
        % suffix: esp.mm.torsion.bond_torsion(
            u_bond_left=nodes.data["u_bond_left"],
            u_bond_right=nodes.data["u_bond_right"],
            u_bond_center=nodes.data["u_bond_center"],
            u_torsion=nodes.data["u"],
            k_side_torsion=nodes.data["k_side_torsion"],
            k_center_torsion=nodes.data["k_center_torsion"],
        ),
    }


def apply_torsion(nodes, suffix=""):
    """Torsion energy in nodes."""
    if (
        "phases%s" % suffix in nodes.data
        and "periodicity%s" % suffix in nodes.data
    ):
        return {
            "u%s"
            % suffix: esp.mm.torsion.periodic_torsion(
                x=nodes.data["x"],
                k=nodes.data["k%s" % suffix],
                phases=nodes.data["phases%s" % suffix],
                periodicity=nodes.data["periodicity%s" % suffix],
            )
        }

    else:
        return {
            "u%s"
            % suffix: esp.mm.torsion.periodic_torsion(
                x=nodes.data["x"],
                k=nodes.data["k%s" % suffix],
            )
        }


def apply_improper_torsion(nodes, suffix=""):
    """Improper torsion energy in nodes."""
    if (
        "phases%s" % suffix in nodes.data
        and "periodicity%s" % suffix in nodes.data
    ):
        return {
            "u%s"
            % suffix: esp.mm.torsion.periodic_torsion(
                x=nodes.data["x"],
                k=nodes.data["k%s" % suffix],
                phases=nodes.data["phases%s" % suffix],
                periodicity=nodes.data["periodicity%s" % suffix],
            )
        }

    else:
        n_multi = nodes.data["k%s" % suffix].shape[-1]
        periodicity=list(range(1, n_multi+1))
        phases=[0.0 for _ in range(n_multi)]
        return {
            "u%s"
            % suffix: esp.mm.torsion.periodic_torsion(
                x=nodes.data["x"],
                k=nodes.data["k%s" % suffix],
                phases=phases,
                periodicity=periodicity,
            )
        }


def apply_bond_gaussian(nodes, suffix=""):
    """Bond energy in nodes."""
    # if suffix == '_ref':
    return {
        "u%s"
        % suffix: esp.mm.bond.gaussian_bond(
            x=nodes.data["x"],
            coefficients=nodes.data["coefficients%s" % suffix],
        )
    }


def apply_bond_linear_mixture(nodes, suffix="", phases=[0.0, 1.0]):
    """Bond energy in nodes."""
    # if suffix == '_ref':
    return {
        "u%s"
        % suffix: esp.mm.bond.linear_mixture_bond(
            x=nodes.data["x"],
            coefficients=nodes.data["coefficients%s" % suffix],
            phases=phases,
        )
    }


def apply_angle_linear_mixture(nodes, suffix="", phases=[0.0, 1.0]):
    """Bond energy in nodes."""
    # if suffix == '_ref':
    return {
        "u%s"
        % suffix: esp.mm.angle.linear_mixture_angle(
            x=nodes.data["x"],
            coefficients=nodes.data["coefficients%s" % suffix],
            phases=phases,
        )
    }


# =============================================================================
# ENERGY IN HYPERNODES---NONBONDED
# =============================================================================
def apply_nonbonded(nodes, scaling=1.0, suffix=""):
    """Nonbonded in nodes."""
    # TODO: should this be 9-6 or 12-6?
    return {
        "u%s"
        % suffix: scaling
        * esp.mm.nonbonded.lj_12_6(
            x=nodes.data["x"],
            sigma=nodes.data["sigma%s" % suffix],
            epsilon=nodes.data["epsilon%s" % suffix],
        )
    }


def apply_coulomb(nodes, scaling=1.0, suffix=""):
    return {
        "u%s"
        % suffix: scaling
        * esp.mm.nonbonded.coulomb(
            x=nodes.data["x"],
            q=nodes.data["q"],
        )
    }


# =============================================================================
# ENERGY IN GRAPH
# =============================================================================
def energy_in_graph(
    g,
    suffix="",
    terms=["n2", "n3", "n4"],
):  # "onefour", "nonbonded"]):
    """Calculate the energy of a small molecule given parameters and geometry.

    Parameters
    ----------
    g : `dgl.DGLHeteroGraph`
        Input graph.

    Returns
    -------
    g : `dgl.DGLHeteroGraph`
        Output graph.

    Notes
    -----
    This function modifies graphs in-place.

    """
    # TODO: this is all very restricted for now
    # we need to make this better
    import dgl

    if "n2" in terms:
        # apply energy function

        if "coefficients%s" % suffix in g.nodes["n2"].data:
            g.apply_nodes(
                lambda node: apply_bond_linear_mixture(
                    node, suffix=suffix, phases=[1.5, 6.0]
                ),
                ntype="n2",
            )
        else:
            g.apply_nodes(
                lambda node: apply_bond(node, suffix=suffix),
                ntype="n2",
            )

    if "n3" in terms:
        if "coefficients%s" % suffix in g.nodes["n3"].data:
            import math

            g.apply_nodes(
                lambda node: apply_angle_linear_mixture(
                    node, suffix=suffix, phases=[0.0, math.pi]
                ),
                ntype="n3",
            )
        else:
            g.apply_nodes(
                lambda node: apply_angle(node, suffix=suffix),
                ntype="n3",
            )

    if g.number_of_nodes("n4") > 0 and "n4" in terms:
        g.apply_nodes(
            lambda node: apply_torsion(node, suffix=suffix),
            ntype="n4",
        )

    if g.number_of_nodes("n4_improper") > 0 and "n4_improper" in terms:
        g.apply_nodes(
            lambda node: apply_improper_torsion(node, suffix=suffix),
            ntype="n4_improper",
        )

    # if g.number_of_nodes("nonbonded") > 0 and "nonbonded" in terms:
    #     g.apply_nodes(
    #         lambda node: apply_nonbonded(node, suffix=suffix),
    #         ntype="nonbonded",
    #     )

    # if g.number_of_nodes("onefour") > 0 and "onefour" in terms:
    #     g.apply_nodes(
    #         lambda node: apply_nonbonded(
    #             node,
    #             suffix=suffix,
    #             scaling=0.5,
    #         ),
    #         ntype="onefour",
    #     )

    if "nonbonded" in terms or "onefour" in terms:
        esp.mm.nonbonded.multiply_charges(g)

    if "nonbonded" in terms and g.number_of_nodes("nonbonded") > 0:
        g.apply_nodes(
            lambda node: apply_coulomb(
                node,
                suffix=suffix,
                scaling=1.0,
            ),
            ntype="nonbonded",
        )

    if "onefour" in terms and g.number_of_nodes("onefour") > 0:
        g.apply_nodes(
            lambda node: apply_coulomb(
                node,
                suffix=suffix,
                # scaling=0.5,
                scaling=0.8333333333333334,
            ),
            ntype="onefour",
        )

    # sum up energy
    # bonded
    g.multi_update_all(
        {
            "%s_in_g"
            % term: (
                dgl.function.copy_u(u="u%s" % suffix, out="m_%s" % term),
                dgl.function.sum(
                    msg="m_%s" % term, out="u_%s%s" % (term, suffix)
                ),
            )
            for term in terms
            if "u%s" % suffix in g.nodes[term].data
        },
        cross_reducer="sum",
    )

    g.apply_nodes(
        lambda node: {
            "u%s"
            % suffix: sum(
                node.data["u_%s%s" % (term, suffix)]
                for term in terms
                if "u_%s%s" % (term, suffix) in node.data
            )
        },
        ntype="g",
    )

    if "u0" in g.nodes["g"].data:
        g.apply_nodes(
            lambda node: {"u": node.data["u"] + node.data["u0"]},
            ntype="g",
        )

    return g


def energy_in_graph_ii(
    g,
    suffix="",
):
    if g.number_of_nodes("n3") > 0:

        g.apply_nodes(
            lambda node: apply_angle_ii(node, suffix=suffix),
            ntype="n3",
        )

        g.apply_nodes(
            lambda node: {
                "u%s" % suffix: node.data["u%s" % suffix]
                + node.data["u_urey_bradley%s" % suffix]
                + node.data["u_bond_bond%s" % suffix]
                + node.data["u_bond_angle%s" % suffix]
            },
            ntype="n3",
        )

    if g.number_of_nodes("n4") > 0:
        g.apply_nodes(
            lambda node: apply_torsion_ii(node, suffix=suffix),
            ntype="n4",
        )

        g.apply_nodes(
            lambda node: {
                "u%s" % suffix: node.data["u%s" % suffix]
                + node.data["u_angle_angle%s" % suffix]
                + node.data["u_angle_torsion%s" % suffix]
                + node.data["u_angle_angle_torsion%s" % suffix]
                + node.data["u_bond_torsion%s" % suffix]
            },
            ntype="n4",
        )

    return g


class EnergyInGraph(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super(EnergyInGraph, self).__init__()
        self.args = args
        self.kwargs = kwargs

    def forward(self, g):
        return energy_in_graph(g, *self.args, **self.kwargs)


class EnergyInGraphII(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super(EnergyInGraphII, self).__init__()
        self.args = args
        self.kwargs = kwargs

    def forward(self, g):
        return energy_in_graph_ii(g, *self.args, **self.kwargs)


class CarryII(torch.nn.Module):
    def forward(self, g):
        import math
        import dgl

        g.multi_update_all(
            {
                "n2_as_0_in_n3": (
                    dgl.function.copy_u("u", "m_u_0"),
                    dgl.function.sum("m_u_0", "u_left"),
                ),
                "n2_as_1_in_n3": (
                    dgl.function.copy_u("u", "m_u_1"),
                    dgl.function.sum("m_u_1", "u_right"),
                ),
                "n2_as_0_in_n4": (
                    dgl.function.copy_u("u", "m_u_0"),
                    dgl.function.sum("m_u_0", "u_bond_left"),
                ),
                "n2_as_1_in_n4": (
                    dgl.function.copy_u("u", "m_u_1"),
                    dgl.function.sum("m_u_1", "u_bond_center"),
                ),
                "n2_as_2_in_n4": (
                    dgl.function.copy_u("u", "m_u_2"),
                    dgl.function.sum("m_u_2", "u_bond_right"),
                ),
                "n3_as_0_in_n4": (
                    dgl.function.copy_u("u", "m3_u_0"),
                    dgl.function.sum("m3_u_0", "u_angle_left"),
                ),
                "n3_as_1_in_n4": (
                    dgl.function.copy_u("u", "m3_u_1"),
                    dgl.function.sum("m3_u_1", "u_angle_right"),
                ),
            },
            cross_reducer="sum",
        )

        return g


================================================
FILE: espaloma/mm/functional.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import math
import torch
import espaloma as esp

# =============================================================================
# CONSTANTS
# =============================================================================
from openmm import unit
from openmm.unit import Quantity

LJ_SWITCH = Quantity(1.0, unit.angstrom).value_in_unit(
    esp.units.DISTANCE_UNIT
)

# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def linear_mixture_to_original(k1, k2, b1, b2):
    """Translating linear mixture coefficients back to original
    parameterization.
    """
    # (batch_size, )
    k = k1 + k2

    # (batch_size, )
    b = (k1 * b1 + k2 * b2) / (k + 1e-7)

    return k, b


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def harmonic(x, k, eq, order=[2]):
    """Harmonic term.

    Parameters
    ----------
    x : `torch.Tensor`, `shape=(batch_size, 1)`
    k : `torch.Tensor`, `shape=(batch_size, len(order))`
    eq : `torch.Tensor`, `shape=(batch_size, len(order))`
    order : `int` or `List` of `int`

    Returns
    -------
    u : `torch.Tensor`, `shape=(batch_size, 1)`
    """

    if isinstance(order, list):
        order = torch.tensor(order, device=x.device)

    return (
        0.5
        * k
        * ((x - eq)).pow(order[:, None, None]).permute(1, 2, 0).sum(dim=-1)
    )


def periodic_fixed_phases(
    dihedrals: torch.Tensor, ks: torch.Tensor
) -> torch.Tensor:
    """Periodic torsion term with n_phases = 6, periodicities = 1..n_phases, phases = zeros

    Parameters
    ----------
    dihedrals : torch.Tensor, shape=(n_snapshots, n_dihedrals)
        dihedral angles -- TODO: confirm in radians?
    ks : torch.Tensor, shape=(n_dihedrals, n_phases)
        force constants -- TODO: confirm in esp.unit.ENERGY_UNIT ?

    Returns
    -------
    u : torch.Tensor, shape=(n_snapshots, 1)
        potential energy of each snapshot

    Notes
    -----
    TODO: is there a way to annotate / type-hint tensor shapes? (currently adding many assert statements)
    TODO: merge with esp.mm.functional.periodic -- adding this because I was having difficulty debugging runtime tensor
      shape errors in esp.mm.functional.periodic, which allows for a more flexible mix of input shapes and types
    """

    # periodicity = 1..n_phases
    n_phases = 6
    periodicity = torch.arange(n_phases) + 1

    # assert input shape consistency
    n_snapshots, n_dihedrals = dihedrals.shape
    n_dihedrals_, n_phases_ = ks.shape
    assert n_dihedrals == n_dihedrals_
    assert n_phases == n_phases_

    # promote everything to this shape
    stacked_shape = (n_snapshots, n_dihedrals, n_phases)

    # duplicate ks n_snapshots times
    ks_stacked = torch.stack([ks] * n_snapshots, dim=0)
    assert ks_stacked.shape == stacked_shape

    # duplicate dihedral angles n_phases times
    dihedrals_stacked = torch.stack([dihedrals] * n_phases, dim=2)
    assert dihedrals_stacked.shape == stacked_shape

    # duplicate periodicity n_snapshots * n_dihedrals times
    ns = torch.stack(
        [torch.stack([periodicity] * n_snapshots)] * n_dihedrals, dim=1
    )
    assert ns.shape == stacked_shape

    # compute k_n * cos(n * theta) for n in 1..n_phases, for each dihedral in each snapshot
    energy_terms = ks_stacked * torch.cos(ns * dihedrals_stacked)
    assert energy_terms.shape == stacked_shape

    # sum over n_dihedrals and n_phases
    energy_sums = energy_terms.sum(dim=(1, 2))
    assert energy_sums.shape == (n_snapshots,)

    return energy_sums.reshape((n_snapshots, 1))


def periodic(
    x, k, periodicity=list(range(1, 7)), phases=[0.0 for _ in range(6)]
):
    """Periodic term.

    Parameters
    ----------
    x : `torch.Tensor`, `shape=(batch_size, 1)`
    k : `torch.Tensor`, `shape=(batch_size, number_of_phases)`
    periodicity: either list of length number_of_phases, or
        `torch.Tensor`, `shape=(batch_size, number_of_phases)`
    phases : either list of length number_of_phases, or
        `torch.Tensor`, `shape=(batch_size, number_of_phases)`
    """

    if isinstance(phases, list):
        phases = torch.tensor(phases, device=x.device)

    if isinstance(periodicity, list):
        periodicity = torch.tensor(
            periodicity,
            device=x.device,
            dtype=torch.get_default_dtype(),
        )

    if periodicity.ndim == 1:
        periodicity = periodicity[None, None, :].repeat(
            x.shape[0], x.shape[1], 1
        )

    elif periodicity.ndim == 2:
        periodicity = periodicity[:, None, :].repeat(1, x.shape[1], 1)

    if phases.ndim == 1:
        phases = phases[None, None, :].repeat(
            x.shape[0],
            x.shape[1],
            1,
        )

    elif phases.ndim == 2:
        phases = phases[:, None, :].repeat(
            1,
            x.shape[1],
            1,
        )

    n_theta = periodicity * x[:, :, None]

    n_theta_minus_phases = n_theta - phases

    cos_n_theta_minus_phases = n_theta_minus_phases.cos()

    k = k[:, None, :].repeat(1, x.shape[1], 1)

    # energy = (k * (1.0 + cos_n_theta_minus_phases)).sum(dim=-1)

    energy = (
        torch.nn.functional.relu(k) * (cos_n_theta_minus_phases + 1.0)
        - torch.nn.functional.relu(0.0 - k) * (cos_n_theta_minus_phases - 1.0)
    ).sum(dim=-1)

    return energy


# simple implementation
# def harmonic(x, k, eq):
#     return k * (x - eq) ** 2
#
# def harmonic_re(x, k, eq, a=0.0, b=0.3):
#     # temporary
#     ka = k
#     kb = eq
#
#     c = ((ka * a + kb * b) / (ka + kb)) ** 2 - a ** 2 - b ** 2
#
#     return ka * (x - a) ** 2 + kb * (x - b) ** 2


def lj(
    x,
    epsilon,
    sigma,
    order=[12, 6],
    coefficients=[1.0, 1.0],
    switch=LJ_SWITCH,
):
    r"""Lennard-Jones term.

    Notes
    -----
    ..math::
    E  = \epsilon  ((\sigma / r) ^ {12} - (\sigma / r) ^ 6)

    Parameters
    ----------
    x : `torch.Tensor`, `shape=(batch_size, 1)`
    epsilon : `torch.Tensor`, `shape=(batch_size, len(order))`
    sigma : `torch.Tensor`, `shape=(batch_size, len(order))`
    order : `int` or `List` of `int`
    coefficients : torch.tensor or list
    switch : unitless switch width (distance)

    Returns
    -------
    u : `torch.Tensor`, `shape=(batch_size, 1)`
    """
    if isinstance(order, list):
        order = torch.tensor(order, device=x.device)

    if isinstance(coefficients, list):
        coefficients = torch.tensor(coefficients, device=x.device)

    assert order.shape[0] == 2
    assert order.dim() == 1

    # TODO:
    # for experiments only
    # erase later

    # compute sigma over x
    sigma_over_x = sigma / x

    # erase values under switch
    sigma_over_x = torch.where(
        torch.lt(x, switch),
        torch.zeros_like(sigma_over_x),
        sigma_over_x,
    )

    return epsilon * (
        coefficients[0] * sigma_over_x ** order[0]
        - coefficients[1] * sigma_over_x ** order[1]
    )


def gaussian(x, coefficients, phases=[idx * 0.001 for idx in range(200)]):
    r"""Gaussian basis function.

    Parameters
    ----------
    x : torch.Tensor
    coefficients : list or torch.Tensor of length n_phases
    phases : list or torch.Tensor of length n_phases
    """

    if isinstance(phases, list):
        # (number_of_phases, )
        phases = torch.tensor(phases, device=x.device)

    # broadcasting
    # (number_of_hypernodes, number_of_snapshots, number_of_phases)
    phases = phases[None, None, :].repeat(x.shape[0], x.shape[1], 1)
    x = x[:, :, None].repeat(1, 1, phases.shape[-1])
    coefficients = coefficients[:, None, :].repeat(1, x.shape[1], 1)

    return (coefficients * torch.exp(-0.5 * (x - phases) ** 2)).sum(-1)


def linear_mixture(x, coefficients, phases=[0.0, 1.0]):
    r"""Linear mixture basis function.

    x : torch.Tensor
    coefficients : list or torch.Tensor of length 2
    phases : list of length 2
    """

    assert len(phases) == 2, "Only two phases now."
    assert coefficients.shape[-1] == 2

    # partition the dimensions
    # (, )
    b1 = phases[0]
    b2 = phases[1]

    # (batch_size, 1)
    k1 = coefficients[:, 0][:, None]
    k2 = coefficients[:, 1][:, None]

    # get the original parameters
    # (batch_size, )
    # k, b = linear_mixture_to_original(k1, k2, b1, b2)

    # (batch_size, 1)
    u1 = k1 * (x - b1) ** 2
    u2 = k2 * (x - b2) ** 2

    u = 0.5 * (u1 + u2)  # - k1 * b1 ** 2 - k2 ** b2 ** 2 + b ** 2

    return u


def harmonic_periodic_coupled(
    x_harmonic,
    x_periodic,
    k,
    eq,
    periodicity=list(range(1, 3)),
):

    if isinstance(periodicity, list):
        periodicity = torch.tensor(
            periodicity,
            device=x_harmonic.device,
            dtype=torch.get_default_dtype(),
        )

    n_theta = (
        periodicity[None, None, :].repeat(
            x_periodic.shape[0], x_periodic.shape[1], 1
        )
        * x_periodic[:, :, None]
    )

    cos_n_theta = n_theta.cos()

    k = k[:, None, :].repeat(1, x_periodic.shape[1], 1)

    sum_k_cos_n_theta = (k * cos_n_theta).sum(dim=-1)

    x_minus_eq = x_harmonic - eq

    energy = x_minus_eq * sum_k_cos_n_theta

    return energy


def harmonic_harmonic_coupled(
    x0,
    x1,
    eq0,
    eq1,
    k,
):
    energy = k * (x0 - eq0) * (x1 - eq1)
    return energy


def harmonic_harmonic_periodic_coupled(
    theta0,
    theta1,
    eq0,
    eq1,
    phi,
    k,
):
    energy = k * (theta0 - eq0) * (theta1 - eq1) * phi.cos()
    return energy


================================================
FILE: espaloma/mm/geometry.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch

# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def reduce_stack(msg, out):
    """Copy massage and stack."""

    def _reduce_stack(nodes, msg=msg, out=out):
        return {out: nodes.mailbox[msg]}

    return _reduce_stack


def copy_src(src, out):
    """Copy source of an edge."""

    def _copy_src(edges, src=src, out=out):
        return {out: edges.src[src].clone()}

    return _copy_src


# =============================================================================
# SINGLE GEOMETRY ENTITY
# =============================================================================
def distance(x0, x1):
    """Distance."""
    return torch.norm(x0 - x1, p=2, dim=-1)


def _angle(r0, r1):
    """Angle between vectors."""

    angle = torch.atan2(
        torch.norm(torch.cross(r0, r1), p=2, dim=-1),
        torch.sum(torch.mul(r0, r1), dim=-1),
    )

    return angle


def angle(x0, x1, x2):
    """Angle between three points."""
    left = x1 - x0
    right = x1 - x2
    return _angle(left, right)


def _dihedral(r0, r1):
    """Dihedral between normal vectors."""
    return _angle(r0, r1)


def dihedral(
    x0: torch.Tensor, x1: torch.Tensor, x2: torch.Tensor, x3: torch.Tensor
) -> torch.Tensor:
    """Dihedral between four points.

    Reference
    ---------
    Closely follows implementation in Yutong Zhao's timemachine:
        https://github.com/proteneer/timemachine/blob/1a0ab45e605dc1e28c44ea90f38cb0dedce5c4db/timemachine/potentials/bonded.py#L152-L199
    """
    # check input shapes

    assert x0.shape == x1.shape == x2.shape == x3.shape

    # compute displacements 0->1, 2->1, 2->3
    r01 = x1 - x0 + torch.randn_like(x0) * 1e-5
    r21 = x1 - x2 + torch.randn_like(x0) * 1e-5
    r23 = x3 - x2 + torch.randn_like(x0) * 1e-5

    # compute normal planes
    n1 = torch.cross(r01, r21)
    n2 = torch.cross(r21, r23)

    rkj_normed = r21 / torch.norm(r21, dim=-1, keepdim=True)

    y = torch.sum(torch.mul(torch.cross(n1, n2), rkj_normed), dim=-1)
    x = torch.sum(torch.mul(n1, n2), dim=-1)

    # choose quadrant correctly
    theta = torch.atan2(y, x)

    return theta


# =============================================================================
# GEOMETRY IN HYPERNODES
# =============================================================================
def apply_bond(nodes):
    """Bond length in nodes."""

    return {"x": distance(x0=nodes.data["xyz0"], x1=nodes.data["xyz1"])}


def apply_angle(nodes):
    """Angle values in nodes."""
    return {
        "x": angle(
            x0=nodes.data["xyz0"],
            x1=nodes.data["xyz1"],
            x2=nodes.data["xyz2"],
        ),
        "x_left": distance(
            x0=nodes.data["xyz1"],
            x1=nodes.data["xyz0"],
        ),
        "x_right": distance(
            x0=nodes.data["xyz1"],
            x1=nodes.data["xyz2"],
        ),
        "x_between": distance(
            x0=nodes.data["xyz0"],
            x1=nodes.data["xyz2"],
        ),
    }


def apply_torsion(nodes):
    """Torsion dihedrals in nodes."""
    return {
        "x": dihedral(
            x0=nodes.data["xyz0"],
            x1=nodes.data["xyz1"],
            x2=nodes.data["xyz2"],
            x3=nodes.data["xyz3"],
        ),
        "x_bond_left": distance(
            x0=nodes.data["xyz0"],
            x1=nodes.data["xyz1"],
        ),
        "x_bond_center": distance(
            x0=nodes.data["xyz1"],
            x1=nodes.data["xyz2"],
        ),
        "x_bond_right": distance(
            x0=nodes.data["xyz2"],
            x1=nodes.data["xyz3"],
        ),
        "x_angle_left": angle(
            x0=nodes.data["xyz0"],
            x1=nodes.data["xyz1"],
            x2=nodes.data["xyz2"],
        ),
        "x_angle_right": angle(
            x0=nodes.data["xyz1"],
            x1=nodes.data["xyz2"],
            x2=nodes.data["xyz3"],
        ),
    }


# =============================================================================
# GEOMETRY IN GRAPH
# =============================================================================
# NOTE:
# The following functions modify graphs in-place.


def geometry_in_graph(g):
    """Assign values to geometric entities in graphs.

    Parameters
    ----------
    g : `dgl.DGLHeteroGraph`
        Input graph.

    Returns
    -------
    g : `dgl.DGLHeteroGraph`
        Output graph.

    Notes
    -----
    This function modifies graphs in-place.

    """
    import dgl

    # Copy coordinates to higher-order nodes.
    g.multi_update_all(
        {
            **{
                "n1_as_%s_in_n%s"
                % (pos_idx, big_idx): (
                    dgl.function.copy_u(u="xyz", out="m_xyz%s" % pos_idx),
                    dgl.function.sum(
                        msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx
                    ),
                )
                for big_idx in range(2, 5)
                for pos_idx in range(big_idx)
            },
            **{
                "n1_as_%s_in_%s"
                % (pos_idx, term): (
                    dgl.function.copy_u(u="xyz", out="m_xyz%s" % pos_idx),
                    dgl.function.sum(
                        msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx
                    ),
                )
                for term in ["nonbonded", "onefour"]
                for pos_idx in [0, 1]
            },
            **{
                "n1_as_%s_in_%s"
                % (pos_idx, term): (
                    dgl.function.copy_u(u="xyz", out="m_xyz%s" % pos_idx),
                    dgl.function.sum(
                        msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx
                    ),
                )
                for term in ["n4_improper"]
                for pos_idx in [0, 1, 2, 3]
            },
        },
        cross_reducer="sum",
    )

    # apply geometry functions
    g.apply_nodes(apply_bond, ntype="n2")
    g.apply_nodes(apply_angle, ntype="n3")

    if g.number_of_nodes("n4") > 0:
        g.apply_nodes(apply_torsion, ntype="n4")

    # copy coordinates to nonbonded
    if g.number_of_nodes("nonbonded") > 0:
        g.apply_nodes(apply_bond, ntype="nonbonded")

    if g.number_of_nodes("onefour") > 0:
        g.apply_nodes(apply_bond, ntype="onefour")

    if g.number_of_nodes("n4_improper") > 0:
        g.apply_nodes(apply_torsion, ntype="n4_improper")

    return g


class GeometryInGraph(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super(GeometryInGraph, self).__init__()
        self.args = args
        self.kwargs = kwargs

    def forward(self, g):
        return geometry_in_graph(g, *self.args, **self.kwargs)


================================================
FILE: espaloma/mm/nonbonded.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch

# =============================================================================
# CONSTANTS
# =============================================================================
import espaloma as esp
from openmm import unit

# CODATA 2018
# ref https://en.wikipedia.org/wiki/Coulomb_constant
# Coulomb constant
K_E = (
    8.9875517923
    * 1e9
    * unit.newton
    * unit.meter**2
    * unit.coulomb ** (-2)
    * esp.units.PARTICLE ** (-1)
).value_in_unit(esp.units.COULOMB_CONSTANT_UNIT)

# =============================================================================
# UTILITY FUNCTIONS FOR COMBINATION RULES FOR NONBONDED
# =============================================================================
def geometric_mean(msg="m", out="epsilon"):
    def _geometric_mean(nodes):
        return {out: torch.prod(nodes.mailbox[msg], dim=1).pow(0.5)}

    return _geometric_mean


def arithmetic_mean(msg="m", out="sigma"):
    def _arithmetic_mean(nodes):
        return {out: torch.sum(nodes.mailbox[msg], dim=1).mul(0.5)}

    return _arithmetic_mean


# =============================================================================
# COMBINATION RULES FOR NONBONDED
# =============================================================================
def lorentz_berthelot(g, suffix=""):
    import dgl

    g.multi_update_all(
        {
            "n1_as_%s_in_%s"
            % (pos_idx, term): (
                dgl.function.copy_u(
                    u="epsilon%s" % suffix, out="m_epsilon"
                ),
                geometric_mean(msg="m_epsilon", out="epsilon%s" % suffix),
            )
            for pos_idx in [0, 1]
            for term in ["nonbonded", "onefour"]
        },
        cross_reducer="sum",
    )

    g.multi_update_all(
        {
            "n1_as_%s_in_%s"
            % (pos_idx, term): (
                dgl.function.copy_u(u="sigma%s" % suffix, out="m_sigma"),
                arithmetic_mean(msg="m_sigma", out="sigma%s" % suffix),
            )
            for pos_idx in [0, 1]
            for term in ["nonbonded", "onefour"]
        },
        cross_reducer="sum",
    )

    return g


def multiply_charges(g, suffix=""):
    """Multiply the charges of atoms into nonbonded and onefour terms.

    Parameters
    ----------
    g : dgl.HeteroGraph
        Input graph.

    Returns
    -------
    dgl.HeteroGraph : The modified graph with charges.

    """
    import dgl

    g.multi_update_all(
        {
            "n1_as_%s_in_%s"
            % (pos_idx, term): (
                dgl.function.copy_u(u="q%s" % suffix, out="m_q"),
                dgl.function.sum(msg="m_q", out="_q")
                # lambda node: {"q%s" % suffix: node.mailbox["m_q"].prod(dim=1)}
            )
            for pos_idx in [0, 1]
            for term in ["nonbonded", "onefour"]
        },
        cross_reducer="stack",
        apply_node_func=lambda node: {"q": node.data["_q"].prod(dim=1)},
    )

    return g


# =============================================================================
# ENERGY FUNCTIONS
# =============================================================================
def lj_12_6(x, sigma, epsilon):
    """Lennard-Jones 12-6.

    Parameters
    ----------
    x : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    sigma : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    epsilon : `torch.Tensor`,
        `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    Returns
    -------
    u : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    """

    return esp.mm.functional.lj(x=x, sigma=sigma, epsilon=epsilon)


def lj_9_6(x, sigma, epsilon):
    """Lennard-Jones 9-6.

    Parameters
    ----------
    x : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    sigma : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    epsilon : `torch.Tensor`,
        `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    Returns
    -------
    u : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`

    """

    return esp.mm.functional.lj(
        x=x, sigma=sigma, epsilon=epsilon, order=[9, 6], coefficients=[2, 3]
    )


def coulomb(x, q, k_e=K_E):
    """Columb interaction without cutoff.

    Parameters
    ----------
    x : `torch.Tensor`, shape=`(batch_size, 1)` or `(batch_size, batch_size, 1)`
        Distance between atoms.

    q : `torch.Tensor`,
        `shape=(batch_size, 1) or `(batch_size, batch_size, 1)`
        Product of charge.

    Returns
    -------
    torch.Tensor : `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)`
        Coulomb energy.

    Notes
    -----
    This computes half Coulomb energy to count for the duplication in onefour
        and nonbonded enumerations.

    """
    return 0.5 * k_e * q / x


================================================
FILE: espaloma/mm/tests/system.xml
================================================
<?xml version="1.0" ?>
<System openmmVersion="7.7" type="System" version="1">
	<PeriodicBoxVectors>
		<A x="2" y="0" z="0"/>
		<B x="0" y="2" z="0"/>
		<C x="0" y="0" z="2"/>
	</PeriodicBoxVectors>
	<Particles>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="12.01"/>
		<Particle mass="16"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
		<Particle mass="1.008"/>
	</Particles>
	<Constraints/>
	<Forces>
		<Force forceGroup="0" name="HarmonicBondForce" type="HarmonicBondForce" usesPeriodic="0" version="2">
			<Bonds>
				<Bond d=".15095000000000003" k="273466.23999999993" p1="0" p2="1"/>
				<Bond d=".15095000000000003" k="273466.23999999993" p1="1" p2="2"/>
				<Bond d=".13343000000000002" k="476473.91999999987" p1="1" p2="3"/>
				<Bond d=".15095000000000003" k="273466.23999999993" p1="3" p2="4"/>
				<Bond d=".15375000000000003" k="251793.11999999994" p1="4" p2="5"/>
				<Bond d=".15095000000000003" k="273466.23999999993" p1="5" p2="6"/>
				<Bond d=".15095000000000003" k="273466.23999999993" p1="6" p2="7"/>
				<Bond d=".13461" k="457980.6399999999" p1="6" p2="8"/>
				<Bond d=".14825" k="296645.5999999999" p1="8" p2="9"/>
				<Bond d=".12183" k="533627.36" p1="9" p2="10"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="0" p2="11"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="0" p2="12"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="0" p2="13"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="2" p2="14"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="2" p2="15"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="2" p2="16"/>
				<Bond d=".10879000000000001" k="287106.07999999996" p1="3" p2="17"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="4" p2="18"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="4" p2="19"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="5" p2="20"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="5" p2="21"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="7" p2="22"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="7" p2="23"/>
				<Bond d=".10969000000000001" k="276646.07999999996" p1="7" p2="24"/>
				<Bond d=".10883000000000001" k="286603.99999999994" p1="8" p2="25"/>
				<Bond d=".11121000000000002" k="259993.75999999995" p1="9" p2="26"/>
			</Bonds>
		</Force>
		<Force forceGroup="0" name="HarmonicAngleForce" type="HarmonicAngleForce" usesPeriodic="0" version="2">
			<Angles>
				<Angle a="2.018473279931442" k="526.3472" p1="0" p2="1" p3="2"/>
				<Angle a="2.1577505542405895" k="536.3888" p1="0" p2="1" p3="3"/>
				<Angle a="1.9261453625009421" k="393.296" p1="1" p2="0" p3="11"/>
				<Angle a="1.9261453625009421" k="393.296" p1="1" p2="0" p3="12"/>
				<Angle a="1.9261453625009421" k="393.296" p1="1" p2="0" p3="13"/>
				<Angle a="1.9261453625009421" k="393.296" p1="1" p2="2" p3="14"/>
				<Angle a="1.9261453625009421" k="393.296" p1="1" p2="2" p3="15"/>
				<Angle a="1.9261453625009421" k="393.296" p1="1" p2="2" p3="16"/>
				<Angle a="2.1577505542405895" k="536.3888" p1="1" p2="3" p3="4"/>
				<Angle a="2.1019000181767713" k="417.5632" p1="1" p2="3" p3="17"/>
				<Angle a="2.1577505542405895" k="536.3888" p1="2" p2="1" p3="3"/>
				<Angle a="1.9470893135248741" k="530.5312" p1="3" p2="4" p3="5"/>
				<Angle a="1.9261453625009421" k="393.296" p1="3" p2="4" p3="18"/>
				<Angle a="1.9261453625009421" k="393.296" p1="3" p2="4" p3="19"/>
				<Angle a="2.0189968787070405" k="384.0912" p1="4" p2="3" p3="17"/>
				<Angle a="1.9470893135248741" k="530.5312" p1="4" p2="5" p3="6"/>
				<Angle a="1.9163715186897738" k="387.4384" p1="4" p2="5" p3="20"/>
				<Angle a="1.9163715186897738" k="387.4384" p1="4" p2="5" p3="21"/>
				<Angle a="1.9163715186897738" k="387.4384" p1="5" p2="4" p3="18"/>
				<Angle a="1.9163715186897738" k="387.4384" p1="5" p2="4" p3="19"/>
				<Angle a="2.018473279931442" k="526.3472" p1="5" p2="6" p3="7"/>
				<Angle a="2.1493729738310168" k="535.552" p1="5" p2="6" p3="8"/>
				<Angle a="1.9261453625009421" k="393.296" p1="6" p2="5" p3="20"/>
				<Angle a="1.9261453625009421" k="393.296" p1="6" p2="5" p3="21"/>
				<Angle a="1.9261453625009421" k="393.296" p1="6" p2="7" p3="22"/>
				<Angle a="1.9261453625009421" k="393.296" p1="6" p2="7" p3="23"/>
				<Angle a="1.9261453625009421" k="393.296" p1="6" p2="7" p3="24"/>
				<Angle a="2.101725485251572" k="548.104" p1="6" p2="8" p3="9"/>
				<Angle a="2.0933479048419987" k="415.05280000000005" p1="6" p2="8" p3="25"/>
				<Angle a="2.1493729738310168" k="535.552" p1="7" p2="6" p3="8"/>
				<Angle a="2.150245638457014" k="575.7184" p1="8" p2="9" p3="10"/>
				<Angle a="2.0052087776162852" k="390.78560000000004" p1="8" p2="9" p3="26"/>
				<Angle a="2.0326104468725963" k="389.112" p1="9" p2="8" p3="25"/>
				<Angle a="2.1066124071571557" k="453.54560000000004" p1="10" p2="9" p3="26"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="11" p2="0" p3="12"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="11" p2="0" p3="13"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="12" p2="0" p3="13"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="14" p2="2" p3="15"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="14" p2="2" p3="16"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="15" p2="2" p3="16"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="18" p2="4" p3="19"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="20" p2="5" p3="21"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="22" p2="7" p3="23"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="22" p2="7" p3="24"/>
				<Angle a="1.8776252092954997" k="329.6992" p1="23" p2="7" p3="24"/>
			</Angles>
		</Force>
		<Force forceGroup="0" name="PeriodicTorsionForce" type="PeriodicTorsionForce" usesPeriodic="0" version="2">
			<Torsions>
				<Torsion k="27.823600000000003" p1="0" p2="1" p3="3" p4="4" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="7.9496" p1="0" p2="1" p3="3" p4="4" periodicity="1" phase="3.141592653589793"/>
				<Torsion k="27.823600000000003" p1="0" p2="1" p3="3" p4="17" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="1.58992" p1="1" p2="3" p3="4" p4="18" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="1" p2="3" p3="4" p4="18" periodicity="1" phase="0"/>
				<Torsion k="1.58992" p1="1" p2="3" p3="4" p4="19" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="1" p2="3" p3="4" p4="19" periodicity="1" phase="0"/>
				<Torsion k="27.823600000000003" p1="2" p2="1" p3="3" p4="4" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="7.9496" p1="2" p2="1" p3="3" p4="4" periodicity="1" phase="3.141592653589793"/>
				<Torsion k="27.823600000000003" p1="2" p2="1" p3="3" p4="17" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="1.58992" p1="3" p2="1" p3="0" p4="11" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="3" p2="1" p3="0" p4="11" periodicity="1" phase="0"/>
				<Torsion k="1.58992" p1="3" p2="1" p3="0" p4="12" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="3" p2="1" p3="0" p4="12" periodicity="1" phase="0"/>
				<Torsion k="1.58992" p1="3" p2="1" p3="0" p4="13" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="3" p2="1" p3="0" p4="13" periodicity="1" phase="0"/>
				<Torsion k="1.58992" p1="3" p2="1" p3="2" p4="14" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="3" p2="1" p3="2" p4="14" periodicity="1" phase="0"/>
				<Torsion k="1.58992" p1="3" p2="1" p3="2" p4="15" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="3" p2="1" p3="2" p4="15" periodicity="1" phase="0"/>
				<Torsion k="1.58992" p1="3" p2="1" p3="2" p4="16" periodicity="3" phase="3.141592653589793"/>
				<Torsion k="4.811599999999999" p1="3" p2="1" p3="2" p4="16" periodicity="1" phase="0"/>
				<Torsion k=".6508444444444444" p1="3" p2="4" p3="5" p4="6" periodicity="3" phase="0"/>
				<Torsion k=".6508444444444444" p1="3" p2="4" p3="5" p4="20" periodicity="3" phase="0"/>
				<Torsion k=".6508444444444444" p1="3" p2="4" p3="5" p4="21" periodicity="3" phase="0"/>
				<Torsion k="27.823600000000003" p1="5" p2="6" p3="8" p4="9" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="27.823600000000003" p1="5" p2="6" p3="8" p4="25" periodicity="2" phase="3.141592653589793"/>
				<Torsion k=".6508444444444444" p1="6" p2="5" p3="4" p4="18" periodicity="3" phase="0"/>
				<Torsion k=".6508444444444444" p1="6" p2="5" p3="4" p4="19" periodicity="3" phase="0"/>
				<Torsion k="9.1002" p1="6" p2="8" p3="9" p4="10" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="9.1002" p1="6" p2="8" p3="9" p4="26" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="27.823600000000003" p1="7" p2="6" p3="8" p4="9" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="27.823600000000003" p1="7" p2="6" p3="8" p4="25" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="9.1002" p1="10" p2="9" p3="8" p4="25" periodicity="2" phase="3.141592653589793"/>
				<Torsion k=".6276" p1="18" p2="4" p3="5" p4="20" periodicity="3" phase="0"/>
				<Torsion k=".6276" p1="18" p2="4" p3="5" p4="21" periodicity="3" phase="0"/>
				<Torsion k=".6276" p1="19" p2="4" p3="5" p4="20" periodicity="3" phase="0"/>
				<Torsion k=".6276" p1="19" p2="4" p3="5" p4="21" periodicity="3" phase="0"/>
				<Torsion k="9.1002" p1="25" p2="8" p3="9" p4="26" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="4.6024" p1="0" p2="3" p3="1" p4="2" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="4.6024" p1="1" p2="4" p3="3" p4="17" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="4.6024" p1="5" p2="7" p3="6" p4="8" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="4.6024" p1="6" p2="9" p3="8" p4="25" periodicity="2" phase="3.141592653589793"/>
				<Torsion k="43.932" p1="8" p2="26" p3="9" p4="10" periodicity="2" phase="3.141592653589793"/>
			</Torsions>
		</Force>
		<Force alpha="0" cutoff="1" dispersionCorrection="1" ewaldTolerance=".0005" exceptionsUsePeriodic="0" forceGroup="0" includeDirectSpace="1" ljAlpha="0" ljnx="0" ljny="0" ljnz="0" method="0" name="NonbondedForce" nx="0" ny="0" nz="0" recipForceGroup="-1" rfDielectric="78.3" switchingDistance="-1" type="NonbondedForce" useSwitchingFunction="0" version="4">
			<GlobalParameters/>
			<ParticleOffsets/>
			<ExceptionOffsets/>
			<Particles>
				<Particle eps=".4577296" q="0" sig=".3399669508423535"/>
				<Particle eps=".359824" q="0" sig=".3399669508423535"/>
				<Particle eps=".4577296" q="0" sig=".3399669508423535"/>
				<Particle eps=".359824" q="0" sig=".3399669508423535"/>
				<Particle eps=".4577296" q="0" sig=".3399669508423535"/>
				<Particle eps=".4577296" q="0" sig=".3399669508423535"/>
				<Particle eps=".359824" q="0" sig=".3399669508423535"/>
				<Particle eps=".4577296" q="0" sig=".3399669508423535"/>
				<Particle eps=".359824" q="0" sig=".3399669508423535"/>
				<Particle eps=".359824" q="0" sig=".3399669508423535"/>
				<Particle eps=".87864" q="0" sig=".2959921901149463"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06276" q="0" sig=".25996424595335105"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06568879999999999" q="0" sig=".2649532787749369"/>
				<Particle eps=".06276" q="0" sig=".25996424595335105"/>
				<Particle eps=".06276" q="0" sig=".2510552587719476"/>
			</Particles>
			<Exceptions>
				<Exception eps="0" p1="0" p2="1" q="0" sig="1"/>
				<Exception eps="0" p1="0" p2="2" q="0" sig="1"/>
				<Exception eps="0" p1="1" p2="2" q="0" sig="1"/>
				<Exception eps="0" p1="0" p2="3" q="0" sig="1"/>
				<Exception eps="0" p1="1" p2="3" q="0" sig="1"/>
				<Exception eps="0" p1="2" p2="3" q="0" sig="1"/>
				<Exception eps=".2288648" p1="0" p2="4" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="1" p2="4" q="0" sig="1"/>
				<Exception eps=".2288648" p1="2" p2="4" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="3" p2="4" q="0" sig="1"/>
				<Exception eps=".20291752979375635" p1="1" p2="5" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="3" p2="5" q="0" sig="1"/>
				<Exception eps="0" p1="4" p2="5" q="0" sig="1"/>
				<Exception eps=".179912" p1="3" p2="6" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="4" p2="6" q="0" sig="1"/>
				<Exception eps="0" p1="5" p2="6" q="0" sig="1"/>
				<Exception eps=".2288648" p1="4" p2="7" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="5" p2="7" q="0" sig="1"/>
				<Exception eps="0" p1="6" p2="7" q="0" sig="1"/>
				<Exception eps=".20291752979375635" p1="4" p2="8" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="5" p2="8" q="0" sig="1"/>
				<Exception eps="0" p1="6" p2="8" q="0" sig="1"/>
				<Exception eps="0" p1="7" p2="8" q="0" sig="1"/>
				<Exception eps=".20291752979375635" p1="5" p2="9" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="6" p2="9" q="0" sig="1"/>
				<Exception eps=".20291752979375635" p1="7" p2="9" q="0" sig=".3399669508423535"/>
				<Exception eps="0" p1="8" p2="9" q="0" sig="1"/>
				<Exception eps=".28113864878383404" p1="6" p2="10" q="0" sig=".3179795704786499"/>
				<Exception eps="0" p1="8" p2="10" q="0" sig="1"/>
				<Exception eps="0" p1="9" p2="10" q="0" sig="1"/>
				<Exception eps="0" p1="0" p2="11" q="0" sig="1"/>
				<Exception eps="0" p1="1" p2="11" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="2" p2="11" q="0" sig=".3024601148086452"/>
				<Exception eps=".07687068162049819" p1="3" p2="11" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="0" p2="12" q="0" sig="1"/>
				<Exception eps="0" p1="1" p2="12" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="2" p2="12" q="0" sig=".3024601148086452"/>
				<Exception eps=".07687068162049819" p1="3" p2="12" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="11" p2="12" q="0" sig="1"/>
				<Exception eps="0" p1="0" p2="13" q="0" sig="1"/>
				<Exception eps="0" p1="1" p2="13" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="2" p2="13" q="0" sig=".3024601148086452"/>
				<Exception eps=".07687068162049819" p1="3" p2="13" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="11" p2="13" q="0" sig="1"/>
				<Exception eps="0" p1="12" p2="13" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="0" p2="14" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="1" p2="14" q="0" sig="1"/>
				<Exception eps="0" p1="2" p2="14" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="3" p2="14" q="0" sig=".3024601148086452"/>
				<Exception eps=".08670021359327784" p1="0" p2="15" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="1" p2="15" q="0" sig="1"/>
				<Exception eps="0" p1="2" p2="15" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="3" p2="15" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="14" p2="15" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="0" p2="16" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="1" p2="16" q="0" sig="1"/>
				<Exception eps="0" p1="2" p2="16" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="3" p2="16" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="14" p2="16" q="0" sig="1"/>
				<Exception eps="0" p1="15" p2="16" q="0" sig="1"/>
				<Exception eps=".08474536815661372" p1="0" p2="17" q="0" sig=".2999655983978523"/>
				<Exception eps="0" p1="1" p2="17" q="0" sig="1"/>
				<Exception eps=".08474536815661372" p1="2" p2="17" q="0" sig=".2999655983978523"/>
				<Exception eps="0" p1="3" p2="17" q="0" sig="1"/>
				<Exception eps="0" p1="4" p2="17" q="0" sig="1"/>
				<Exception eps=".08474536815661372" p1="5" p2="17" q="0" sig=".2999655983978523"/>
				<Exception eps=".07687068162049819" p1="1" p2="18" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="3" p2="18" q="0" sig="1"/>
				<Exception eps="0" p1="4" p2="18" q="0" sig="1"/>
				<Exception eps="0" p1="5" p2="18" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="6" p2="18" q="0" sig=".3024601148086452"/>
				<Exception eps=".03210385135774211" p1="17" p2="18" q="0" sig=".262458762364144"/>
				<Exception eps=".07687068162049819" p1="1" p2="19" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="3" p2="19" q="0" sig="1"/>
				<Exception eps="0" p1="4" p2="19" q="0" sig="1"/>
				<Exception eps="0" p1="5" p2="19" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="6" p2="19" q="0" sig=".3024601148086452"/>
				<Exception eps=".03210385135774211" p1="17" p2="19" q="0" sig=".262458762364144"/>
				<Exception eps="0" p1="18" p2="19" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="3" p2="20" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="4" p2="20" q="0" sig="1"/>
				<Exception eps="0" p1="5" p2="20" q="0" sig="1"/>
				<Exception eps="0" p1="6" p2="20" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="7" p2="20" q="0" sig=".3024601148086452"/>
				<Exception eps=".07687068162049819" p1="8" p2="20" q="0" sig=".3024601148086452"/>
				<Exception eps=".032844399999999996" p1="18" p2="20" q="0" sig=".2649532787749369"/>
				<Exception eps=".032844399999999996" p1="19" p2="20" q="0" sig=".2649532787749369"/>
				<Exception eps=".07687068162049819" p1="3" p2="21" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="4" p2="21" q="0" sig="1"/>
				<Exception eps="0" p1="5" p2="21" q="0" sig="1"/>
				<Exception eps="0" p1="6" p2="21" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="7" p2="21" q="0" sig=".3024601148086452"/>
				<Exception eps=".07687068162049819" p1="8" p2="21" q="0" sig=".3024601148086452"/>
				<Exception eps=".032844399999999996" p1="18" p2="21" q="0" sig=".2649532787749369"/>
				<Exception eps=".032844399999999996" p1="19" p2="21" q="0" sig=".2649532787749369"/>
				<Exception eps="0" p1="20" p2="21" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="5" p2="22" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="6" p2="22" q="0" sig="1"/>
				<Exception eps="0" p1="7" p2="22" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="8" p2="22" q="0" sig=".3024601148086452"/>
				<Exception eps=".08670021359327784" p1="5" p2="23" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="6" p2="23" q="0" sig="1"/>
				<Exception eps="0" p1="7" p2="23" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="8" p2="23" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="22" p2="23" q="0" sig="1"/>
				<Exception eps=".08670021359327784" p1="5" p2="24" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="6" p2="24" q="0" sig="1"/>
				<Exception eps="0" p1="7" p2="24" q="0" sig="1"/>
				<Exception eps=".07687068162049819" p1="8" p2="24" q="0" sig=".3024601148086452"/>
				<Exception eps="0" p1="22" p2="24" q="0" sig="1"/>
				<Exception eps="0" p1="23" p2="24" q="0" sig="1"/>
				<Exception eps=".08474536815661372" p1="5" p2="25" q="0" sig=".2999655983978523"/>
				<Exception eps="0" p1="6" p2="25" q="0" sig="1"/>
				<Exception eps=".08474536815661372" p1="7" p2="25" q="0" sig=".2999655983978523"/>
				<Exception eps="0" p1="8" p2="25" q="0" sig="1"/>
				<Exception eps="0" p1="9" p2="25" q="0" sig="1"/>
				<Exception eps=".11741320879696628" p1="10" p2="25" q="0" sig=".2779782180341487"/>
				<Exception eps=".07513746442354838" p1="6" p2="26" q="0" sig=".2955111048071506"/>
				<Exception eps="0" p1="8" p2="26" q="0" sig="1"/>
				<Exception eps="0" p1="9" p2="26" q="0" sig="1"/>
				<Exception eps="0" p1="10" p2="26" q="0" sig="1"/>
				<Exception eps=".03138" p1="25" p2="26" q="0" sig=".25550975236264933"/>
			</Exceptions>
		</Force>
		<Force forceGroup="0" frequency="1" name="CMMotionRemover" type="CMMotionRemover" version="1"/>
	</Forces>
</System>


================================================
FILE: espaloma/mm/tests/test_angle.py
================================================
import numpy as np
import numpy.testing as npt
import pytest
import torch


def test_angle_random_vectors():
    import espaloma as esp

    distribution = torch.distributions.normal.Normal(
        loc=torch.zeros(
            3,
        ),
        scale=torch.ones(
            3,
        ),
    )

    left = distribution.sample()
    right = distribution.sample()

    cos_ref = (left * right).sum(dim=-1) / (
        torch.norm(left) * torch.norm(right)
    )

    cos_hat = torch.cos(esp.mm.geometry._angle(left, right))

    npt.assert_almost_equal(cos_ref.numpy(), cos_hat.numpy(), decimal=3)


def test_angle_random_points():
    import espaloma as esp

    distribution = torch.distributions.normal.Normal(
        loc=torch.zeros(5, 3), scale=torch.ones(5, 3)
    )

    x0 = distribution.sample()
    x1 = distribution.sample()
    x2 = distribution.sample()

    left = x1 - x0
    right = x1 - x2

    cos_ref = (left * right).sum(dim=-1) / (
        torch.norm(left, dim=-1) * torch.norm(right, dim=-1)
    )

    cos_hat = torch.cos(esp.angle(x0, x1, x2))

    npt.assert_almost_equal(cos_ref.numpy(), cos_hat.numpy(), decimal=3)


def test_zero():
    import espaloma as esp

    x0 = torch.zeros(5, 3)

    npt.assert_almost_equal(esp.angle(x0, x0, x0).numpy(), 0.0)


================================================
FILE: espaloma/mm/tests/test_angle_energy.py
================================================
import numpy as np
import numpy.testing as npt
import pytest
import torch
import openmm
from openmm import unit

from espaloma.utils.geometry import _sample_four_particle_torsion_scan

omm_angle_unit = unit.radian
omm_energy_unit = unit.kilojoule_per_mole

from openmm import app

import espaloma as esp


def test_energy_angle_and_bond():
    g = esp.Graph("C")
    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    # get simulation
    esp_simulation = MoleculeVacuumSimulation(
        n_samples=1, n_steps_per_sample=10, forcefield="gaff-1.81"
    )

    simulation = esp_simulation.simulation_from_graph(g)
    system = simulation.system
    esp_simulation.run(g)

    forces = list(system.getForces())

    energies = {}

    for idx, force in enumerate(forces):
        force.setForceGroup(idx)

        name = force.__class__.__name__

        if "Nonbonded" in name:
            force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)

    # create new simulation
    _simulation = openmm.app.Simulation(
        simulation.topology,
        system,
        openmm.VerletIntegrator(0.0),
    )

    _simulation.context.setPositions(
        g.nodes["n1"].data["xyz"][:, 0, :].detach().numpy() * unit.nanometer
    )

    for idx, force in enumerate(forces):
        name = force.__class__.__name__

        state = _simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            groups=2**idx,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT
        )

        energies[name] = energy

    for idx, force in enumerate(forces):
        name = force.__class__.__name__
        if "HarmonicAngleForce" in name:
            print("openmm thinks there are %s angles" % force.getNumAngles())

            for _idx in range(force.getNumAngles()):
                _, __, ___, eq, k = force.getAngleParameters(_idx)
                eq = eq.value_in_unit(esp.units.ANGLE_UNIT)
                k = k.value_in_unit(esp.units.ANGLE_FORCE_CONSTANT_UNIT)
                print(eq, k)

    # parametrize
    ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81")
    g = ff.parametrize(g)

    # n2 : bond, n3: angle, n1: nonbonded?
    # n1 : sigma (k), epsilon (eq), and charge (not included yet)
    for term in ["n2", "n3"]:
        g.nodes[term].data["k"] = g.nodes[term].data["k_ref"]
        g.nodes[term].data["eq"] = g.nodes[term].data["eq_ref"]

    print(
        "espaloma thinks there are %s angles"
        % g.heterograph.number_of_nodes("n3")
    )
    print(g.nodes["n3"].data["k"])
    print(g.nodes["n3"].data["eq"])

    # for each atom, store n_snapshots x 3
    # g.nodes["n1"].data["xyz"] = torch.tensor(
    #     simulation.context.getState(getPositions=True)
    #     .getPositions(asNumpy=True)
    #     .value_in_unit(esp.units.DISTANCE_UNIT),
    #     dtype=torch.float32,
    # )[None, :, :].permute(1, 0, 2)

    # print(g.nodes['n2'].data)
    esp.mm.geometry.geometry_in_graph(g.heterograph)
    esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3", "n4"])

    n_decimals = 3

    # test angles
    npt.assert_almost_equal(
        g.nodes["g"].data["u_n3"].detach().numpy(),
        energies["HarmonicAngleForce"],
        decimal=n_decimals,
    )


if __name__ == "__main__":
    test_energy_angle_and_bond()


================================================
FILE: espaloma/mm/tests/test_bond_energy.py
================================================
import pytest


def test_multiple_conformation():
    import espaloma as esp

    g = esp.Graph("c1ccccc1")

    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)
    g = simulation.run(g, in_place=True)

    param = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    ).parametrize

    g = param(g)

    esp.mm.geometry.geometry_in_graph(g.heterograph)

    esp.mm.energy.energy_in_graph(g.heterograph, suffix="_ref")


================================================
FILE: espaloma/mm/tests/test_charge_energy_consistency.py
================================================
import pytest
import espaloma as esp
import numpy as np
import numpy.testing as npt
import pytest
import torch


@pytest.mark.parametrize(
    "g",
    esp.data.esol(first=10),  # use a subset of ESOL dataset to test
    # [esp.Graph("c1ccccc1")],
)
def test_coulomb_energy_consistency(g):
    """We use both `esp.mm` and OpenMM to compute the Coulomb energy of
    some molecules with generated geometries and see if the resulting Columb
    energy matches.


    """
    from openff.units import unit as openff_unit

    from espaloma.data.md import MoleculeVacuumSimulation

    print(g.mol)

    # get simulation
    esp_simulation = MoleculeVacuumSimulation(
        n_samples=10,
        n_steps_per_sample=10,
        forcefield="gaff-1.81",
        charge_method="gasteiger",
    )

    simulation = esp_simulation.simulation_from_graph(g)
    charges = g.mol.partial_charges.m_as(openff_unit.elementary_charge).flatten()
    system = simulation.system

    esp_simulation.run(g, in_place=True)

    # if MD blows up, forget about it
    if g.nodes["n1"].data["xyz"].abs().max() > 100:
        pytest.skip(
            "MD simulation blew up, skipping test. "
        )

    g.nodes["n1"].data["q"] = torch.tensor(charges).unsqueeze(-1)
    esp.mm.nonbonded.multiply_charges(g.heterograph)
    esp.mm.geometry.geometry_in_graph(g.heterograph)
    esp.mm.energy.energy_in_graph(
        g.heterograph, terms=["nonbonded", "onefour"]
    )

    print(g.nodes["g"].data["u"].detach())
    print(esp.data.md.get_coulomb_force(g)[0])

    npt.assert_almost_equal(
        g.nodes["g"].data["u"].detach().numpy(),
        esp.data.md.get_coulomb_force(g)[0].numpy(),
        decimal=3,
    )


================================================
FILE: espaloma/mm/tests/test_charge_energy_consistency_hardcode.py
================================================
import pytest
import espaloma as esp
import numpy as np
import numpy.testing as npt
import pytest
import torch
import openmm
from openmm import unit


@pytest.mark.parametrize(
    "g",
    esp.data.esol(first=1),  # use a subset of ESOL dataset to test
)
def test_coulomb_energy_consistency(g):
    """We use both `esp.mm` and OpenMM to compute the Coulomb energy of
    some molecules with generated geometries and see if the resulting Columb
    energy matches.


    """
    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    # get simulation
    esp_simulation = MoleculeVacuumSimulation(
        n_samples=1,
        n_steps_per_sample=10,
        forcefield="gaff-1.81",
        charge_method="gasteiger",
    )

    simulation = esp_simulation.simulation_from_graph(g)
    charges = g.mol.partial_charges.flatten()
    system = simulation.system

    esp_simulation.run(g, in_place=True)

    # if MD blows up, forget about it
    if g.nodes["n1"].data["xyz"].abs().max() > 100:
        pytest.skip(
            "MD simulation blew up, skipping test. "
        )

    _simulation = openmm.app.Simulation(
        simulation.topology,
        system,
        openmm.VerletIntegrator(0.0),
    )

    forces = list(system.getForces())
    for force in forces:
        name = force.__class__.__name__
        if "Nonbonded" in name:
            force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)
            force.updateParametersInContext(_simulation.context)

    _simulation.context.setPositions(
        g.nodes["n1"].data["xyz"][:, 0, :].detach().numpy() * unit.bohr
    )

    state = _simulation.context.getState(
        getEnergy=True,
        getParameters=True,
    )

    energy_old = state.getPotentialEnergy().value_in_unit(
        esp.units.ENERGY_UNIT
    )

    forces = list(system.getForces())

    print(forces)
    for force in forces:
        name = force.__class__.__name__
        print(name)
        if name == "NonbondedForce":
            force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)
            print(force.getNumExceptions())
            for idx in range(force.getNumParticles()):
                q, sigma, epsilon = force.getParticleParameters(idx)
                force.setParticleParameters(idx, 0.0, sigma, epsilon)

            for idx in range(force.getNumExceptions()):
                idx0, idx1, q, sigma, epsilon = force.getExceptionParameters(
                    idx
                )
                force.setExceptionParameters(
                    idx, idx0, idx1, 0.0, sigma, epsilon
                )

            force.updateParametersInContext(_simulation.context)

    state = _simulation.context.getState(
        getEnergy=True,
        getParameters=True,
    )

    energy_new = state.getPotentialEnergy().value_in_unit(
        esp.units.ENERGY_UNIT
    )

    g.nodes["n1"].data["q"] = torch.tensor(charges).unsqueeze(-1)
    esp.mm.nonbonded.multiply_charges(g.heterograph)
    esp.mm.geometry.geometry_in_graph(g.heterograph)
    esp.mm.energy.energy_in_graph(
        g.heterograph, terms=["nonbonded", "onefour"]
    )

    npt.assert_almost_equal(
        g.nodes["g"].data["u"].item(),
        energy_old - energy_new,
        decimal=3,
    )


================================================
FILE: espaloma/mm/tests/test_dihedral.py
================================================
import numpy.testing as npt
import torch

import espaloma as esp
from espaloma.utils.geometry import (
    _sample_four_particle_torsion_scan,
    _timemachine_signed_torsion_angle,
)


def test_dihedral_vectors():
    import espaloma as esp

    distribution = torch.distributions.normal.Normal(
        loc=torch.zeros(5, 3), scale=torch.ones(5, 3)
    )

    left = distribution.sample()
    right = distribution.sample()

    npt.assert_almost_equal(
        esp.mm.geometry._angle(left, right).numpy(),
        esp.mm.geometry._dihedral(left, right).numpy(),
        decimal=3,
    )


def test_dihedral_points():
    n_samples = 1000

    # get geometries
    xyz_np = _sample_four_particle_torsion_scan(n_samples)

    # compute dihedrals using timemachine (numpy / JAX)
    ci, cj, ck, cl = (
        xyz_np[:, 0, :],
        xyz_np[:, 1, :],
        xyz_np[:, 2, :],
        xyz_np[:, 3, :],
    )
    theta_timemachine = _timemachine_signed_torsion_angle(ci, cj, ck, cl)

    # compute dihedrals using espaloma (PyTorch)
    xyz = torch.tensor(xyz_np)
    x0, x1, x2, x3 = xyz[:, 0, :], xyz[:, 1, :], xyz[:, 2, :], xyz[:, 3, :]
    theta_espaloma = esp.dihedral(x0, x1, x2, x3).numpy()

    npt.assert_almost_equal(
        theta_timemachine,
        theta_espaloma,
        decimal=3,
    )


================================================
FILE: espaloma/mm/tests/test_distance.py
================================================
import numpy as np
import numpy.testing as npt
import pytest
import torch


def test_distance():
    import espaloma as esp

    distribution = torch.distributions.normal.Normal(
        loc=torch.zeros(5, 3), scale=torch.ones(5, 3)
    )

    x0 = distribution.sample()
    x1 = distribution.sample()

    npt.assert_almost_equal(
        esp.distance(x0, x1).numpy(),
        torch.sqrt((x0 - x1).pow(2).sum(dim=-1)).numpy(),
        decimal=3,
    )

    npt.assert_almost_equal(esp.distance(x0, x0).numpy(), 0.0)


================================================
FILE: espaloma/mm/tests/test_energy.py
================================================
import pytest
import torch

import espaloma as esp


def test_import():
    esp.mm.energy


def test_energy():
    g = esp.Graph("c1ccccc1")

    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)
    g = simulation.run(g, in_place=True)

    param = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    ).parametrize

    g = param(g)

    # parametrize
    layer = esp.nn.dgl_legacy.gn()
    net = torch.nn.Sequential(
        esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]),
        esp.nn.readout.janossy.JanossyPooling(
            in_features=32,
            config=[32, "tanh"],
            out_features={
                1: ["epsilon", "sigma"],
                2: ["k", "eq"],
                3: ["k", "eq"],
                4: ["k"],
            },
        ),
        esp.nn.readout.janossy.JanossyPoolingImproper(
            in_features=32,
            config=[32, "tanh"],
            out_features={
                "k": 6,
            },
        ),
    )

    g = net(g.heterograph)

    # print(g.nodes['n2'].data)
    esp.mm.geometry.geometry_in_graph(g)
    # esp.mm.energy.energy_in_graph(g)

    esp.mm.energy.energy_in_graph(g, terms=["n2", "n3", "n4", "n4_improper"])


# def test_energy_consistent():
#     g = esp.Graph("c1ccccc1")
#
#     # make simulation
#     from espaloma.data.md import MoleculeVacuumSimulation
#
#     simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)
#     g = simulation.run(g, in_place=True)
#
#     param = esp.graphs.legacy_force_field.LegacyForceField(
#         "smirnoff99Frosst-1.1.0"
#     ).parametrize
#
#     g = param(g)
#
#     for node in ["n1", "n2", "n3"]:
#         _dict = {}
#         for data in g.nodes[node].data.keys():
#             if data.endswith("_ref"):
#                 _dict[data.replace("_ref", "")] = g.nodes[node].data[data]
#         for key, value in _dict.items():
#             g.nodes[node].data[key] = value
#
#     # print(g.nodes['n2'].data)
#     esp.mm.geometry.geometry_in_graph(g.heterograph)
#     esp.mm.energy.energy_in_graph(g.heterograph)
#
#     esp.mm.energy.energy_in_graph(g.heterograph, suffix="_ref")


================================================
FILE: espaloma/mm/tests/test_energy_gaussian.py
================================================
import pytest


"""
def test_energy():
    import espaloma as esp
    import torch

    g = esp.Graph("c1ccccc1")

    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)
    g = simulation.run(g, in_place=True)

    param = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    ).parametrize

    g = param(g)

    # parametrize
    layer = esp.nn.dgl_legacy.gn()
    net = torch.nn.Sequential(
        esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]),
        esp.nn.readout.janossy.JanossyPooling(
            in_features=32, config=[32, "tanh"],
            out_features={
                1: {'sigma': 1, 'epsilon': 1},
                2: {'coefficients': 200},
                3: {'k':1, 'eq': 1},
            },
        ),
    )

    g = net(g.heterograph)

    # print(g.nodes['n2'].data)
    esp.mm.geometry.geometry_in_graph(g)
    esp.mm.energy.energy_in_graph(g)

    esp.mm.energy.energy_in_graph(g, suffix="_ref")
"""


================================================
FILE: espaloma/mm/tests/test_energy_ii.py
================================================
import pytest
import espaloma as esp
import torch
import dgl


def test_energy():
    g = esp.Graph("c1ccccc1")

    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)
    g = simulation.run(g, in_place=True)

    param = esp.graphs.legacy_force_field.LegacyForceField(
        "gaff-1.81"
    ).parametrize

    g = param(g)

    # parametrize

    # layer
    layer = esp.nn.layers.dgl_legacy.gn()

    # representation
    representation = esp.nn.Sequential(
        layer, config=[32, "relu", 32, "relu", 32, "relu"]
    )

    # get the last bit of units
    units = 32

    janossy_config = [32, "relu"]

    readout = esp.nn.readout.janossy.JanossyPooling(
        in_features=units,
        config=janossy_config,
        out_features={
            2: {"log_coefficients": 2},
            3: {
                "log_coefficients": 2,
                "coefficients_urey_bradley": 2,
                "k_bond_bond": 1,
                "k_bond_angle": 1,
                "k_bond_angle": 1,
            },
            4: {
                "k": 6,
                "k_angle_angle": 1,
                "k_angle_angle_torsion": 1,
                "k_angle_torsion": 1,
                "k_side_torsion": 1,
                "k_center_torsion": 1,
            },
        },
    )

    readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper(
        in_features=units, config=janossy_config
    )

    class ExpCoeff(torch.nn.Module):
        def forward(self, g):
            g.nodes["n2"].data["coefficients"] = (
                g.nodes["n2"].data["log_coefficients"].exp()
            )
            g.nodes["n3"].data["coefficients"] = (
                g.nodes["n3"].data["log_coefficients"].exp()
            )
            return g

    class CarryII(torch.nn.Module):
        def forward(self, g):
            import math

            g.multi_update_all(
                {
                    "n2_as_0_in_n3": (
                        dgl.function.copy_u("u", "m_u_0"),
                        dgl.function.sum("m_u_0", "u_left"),
                    ),
                    "n2_as_1_in_n3": (
                        dgl.function.copy_u("u", "m_u_1"),
                        dgl.function.sum("m_u_1", "u_right"),
                    ),
                    "n2_as_0_in_n4": (
                        dgl.function.copy_u("u", "m_u_0"),
                        dgl.function.sum("m_u_0", "u_bond_left"),
                    ),
                    "n2_as_1_in_n4": (
                        dgl.function.copy_u("u", "m_u_1"),
                        dgl.function.sum("m_u_1", "u_bond_center"),
                    ),
                    "n2_as_2_in_n4": (
                        dgl.function.copy_u("u", "m_u_2"),
                        dgl.function.sum("m_u_2", "u_bond_right"),
                    ),
                    "n3_as_0_in_n4": (
                        dgl.function.copy_u("u", "m3_u_0"),
                        dgl.function.sum("m3_u_0", "u_angle_left"),
                    ),
                    "n3_as_1_in_n4": (
                        dgl.function.copy_u("u", "m3_u_1"),
                        dgl.function.sum("m3_u_1", "u_angle_right"),
                    ),
                },
                cross_reducer="sum",
            )

            return g

    net = torch.nn.Sequential(
        representation,
        readout,
        readout_improper,
        ExpCoeff(),
        esp.mm.geometry.GeometryInGraph(),
        esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"]),
        CarryII(),
        esp.mm.energy.EnergyInGraphII(),
    )

    torch.nn.init.normal_(
        net[1].f_out_2_to_log_coefficients.bias,
        mean=-5,
    )
    torch.nn.init.normal_(
        net[1].f_out_3_to_log_coefficients.bias,
        mean=-5,
    )

    for name, module in net[1].named_modules():
        if "k" in name:
            torch.nn.init.normal(module.bias, mean=0.0, std=1e-4)
            torch.nn.init.normal(module.weight, mean=0.0, std=1e-4)

    g = net(g.heterograph)

    print(g.nodes["n3"].data)
    print(g.nodes["n4"].data)

    # print(g.nodes['n2'].data)
    esp.mm.geometry.geometry_in_graph(g)
    esp.mm.energy.energy_in_graph(g)


================================================
FILE: espaloma/mm/tests/test_geometry.py
================================================
import pytest
import torch

import espaloma as esp
from espaloma.graphs.utils.regenerate_impropers import regenerate_impropers


def test_import():
    esp.mm.geometry


# later, if we want to do multiple molecules, group these into a struct
smiles = "c1ccccc1"
n_samples = 2
## Different number of expected terms for different improper permutations
expected_n_terms = {
    "none": dict(n2=24, n3=36, n4=48, n4_improper=36),
    "espaloma": dict(n2=24, n3=36, n4=48, n4_improper=36),
    "smirnoff": dict(n2=24, n3=36, n4=48, n4_improper=18),
}


@pytest.fixture
def all_g():
    from espaloma.data.md import MoleculeVacuumSimulation

    all_g = {}
    for improper_def in expected_n_terms.keys():
        g = esp.Graph(smiles)
        if improper_def != "none":
            regenerate_impropers(g, improper_def)

        simulation = MoleculeVacuumSimulation(
            n_samples=n_samples, n_steps_per_sample=1
        )
        g = simulation.run(g, in_place=True)
        all_g[improper_def] = g
    return all_g


def test_geometry_can_be_computed_without_exceptions(all_g):
    for g in all_g.values():
        g = esp.mm.geometry.geometry_in_graph(g.heterograph)


def test_geometry_n_terms(all_g):
    for improper_def, g in all_g.items():
        g = esp.mm.geometry.geometry_in_graph(g.heterograph)

        for term, n_terms in expected_n_terms[improper_def].items():
            assert g.nodes[term].data["x"].shape == torch.Size(
                [n_terms, n_samples]
            )


================================================
FILE: espaloma/mm/tests/test_linear_combination.py
================================================
import pytest


def test_linear_combination():
    import torch
    import espaloma as esp

    assert (
        esp.mm.functional.linear_mixture(
            0.0,
            torch.tensor([[0.0, 0.0]]),
        )
        == 0.0
    )
    assert (
        esp.mm.functional.linear_mixture(
            1.0,
            torch.tensor([[1.0, 1.0]]),
            [0.0, 2.0],
        )
        == 1.0
    )


def test_consistency():
    import torch
    import espaloma as esp

    g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")

    from espaloma.data.md import MoleculeVacuumSimulation

    simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10)
    g = simulation.run(g, in_place=True)

    g.nodes["n2"].data["coefficients"] = torch.randn(
        g.heterograph.number_of_nodes("n2"), 2
    ).exp()

    g.nodes["n3"].data["coefficients"] = torch.randn(
        g.heterograph.number_of_nodes("n3"), 2
    ).exp()

    esp.mm.geometry.geometry_in_graph(g.heterograph)

    esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3"])

    u0_2 = g.nodes["n2"].data["u"] - g.nodes["n2"].data["u"].mean(
        dim=1, keepdims=True
    )
    u0_3 = g.nodes["n3"].data["u"] - g.nodes["n3"].data["u"].mean(
        dim=1, keepdims=True
    )
    u0 = g.nodes["g"].data["u"] - g.nodes["g"].data["u"].mean(
        dim=1, keepdims=True
    )

    (
        g.nodes["n2"].data["k"],
        g.nodes["n2"].data["eq"],
    ) = esp.mm.functional.linear_mixture_to_original(
        g.nodes["n2"].data["coefficients"][:, 0][:, None],
        g.nodes["n2"].data["coefficients"][:, 1][:, None],
        1.5,
        6.0,
    )

    import math

    (
        g.nodes["n3"].data["k"],
        g.nodes["n3"].data["eq"],
    ) = esp.mm.functional.linear_mixture_to_original(
        g.nodes["n3"].data["coefficients"][:, 0][:, None],
        g.nodes["n3"].data["coefficients"][:, 1][:, None],
        0.0,
        math.pi,
    )

    g.nodes["n2"].data.pop("coefficients")
    g.nodes["n3"].data.pop("coefficients")

    esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3"])

    u1_2 = g.nodes["n2"].data["u"] - g.nodes["n2"].data["u"].mean(
        dim=1, keepdims=True
    )
    u1_3 = g.nodes["n3"].data["u"] - g.nodes["n3"].data["u"].mean(
        dim=1, keepdims=True
    )
    u1 = g.nodes["g"].data["u"] - g.nodes["g"].data["u"].mean(
        dim=1, keepdims=True
    )

    import numpy.testing as npt

    npt.assert_almost_equal(
        u0_2.detach().numpy(),
        u1_2.detach().numpy(),
        decimal=3,
    )

    npt.assert_almost_equal(
        u0_3.detach().numpy(),
        u1_3.detach().numpy(),
        decimal=3,
    )

    npt.assert_almost_equal(
        u0.detach().numpy(),
        u1.detach().numpy(),
        decimal=3,
    )


================================================
FILE: espaloma/mm/tests/test_openmm_consistency.py
================================================
import numpy as np
import numpy.testing as npt
import pytest
import torch
import openmm
from openmm import unit

from espaloma.utils.geometry import _sample_four_particle_torsion_scan

omm_angle_unit = unit.radian
omm_energy_unit = unit.kilojoule_per_mole

from openmm import app

import espaloma as esp

decimal_threshold = 2


def _create_torsion_sim(
    periodicity: int = 2, phase=0 * omm_angle_unit, k=10.0 * omm_energy_unit
) -> app.Simulation:
    """Create a 4-particle OpenMM Simulation containing only a PeriodicTorsionForce"""
    system = openmm.System()

    # add 4 particles of unit mass
    for _ in range(4):
        system.addParticle(1)

    # add torsion force to system
    force = openmm.PeriodicTorsionForce()
    force.addTorsion(0, 1, 2, 3, periodicity, phase, k)
    system.addForce(force)

    # create openmm Simulation, which requires a Topology and Integrator
    topology = app.Topology()
    chain = topology.addChain()
    residue = topology.addResidue("torsion", chain)
    for name in ["a", "b", "c", "d"]:
        topology.addAtom(name, "C", residue)
    integrator = openmm.VerletIntegrator(1.0)
    sim = app.Simulation(topology, system, integrator)

    return sim


# TODO: mark this properly: want to test periodicities 1..6, +ve, -ve k
# @pytest.mark.parametrize(periodicity=[1,2,3,4,5,6], k=[-10 * omm_energy_unit, +10 * omm_energy_unit])
def test_periodic_torsion(
    periodicity=4, k=10 * omm_energy_unit, n_samples=100
):
    """Using simulated torsion scan, test if espaloma torsion energies and
    OpenMM torsion energies agree.

    """
    phase = 0 * omm_angle_unit  # all zero phases

    # create torsion simulation
    sim = _create_torsion_sim(periodicity=periodicity, phase=phase, k=k)

    # grab snapshots from torsion scan
    xyz_np = _sample_four_particle_torsion_scan(n_samples)

    # compute energies using OpenMM
    openmm_energies = np.zeros(n_samples)
    for i, pos in enumerate(xyz_np):
        sim.context.setPositions(pos)
        openmm_energies[i] = (
            sim.context.getState(getEnergy=True).getPotentialEnergy()
            / omm_energy_unit
        )

    # compute energies using espaloma
    xyz = torch.tensor(xyz_np)
    x0, x1, x2, x3 = xyz[:, 0, :], xyz[:, 1, :], xyz[:, 2, :], xyz[:, 3, :]
    theta = esp.mm.geometry.dihedral(x0, x1, x2, x3).reshape((n_samples, 1))
    ks = torch.zeros(n_samples, 6)
    ks[:, periodicity - 1] = k.value_in_unit(esp.units.ENERGY_UNIT)

    espaloma_energies = (
        esp.mm.functional.periodic(theta, ks).numpy().flatten()
        * esp.units.ENERGY_UNIT
    )
    espaloma_energies_in_omm_units = espaloma_energies.value_in_unit(
        omm_energy_unit
    )

    np.testing.assert_almost_equal(
        actual=espaloma_energies_in_omm_units,
        desired=openmm_energies,
        decimal=decimal_threshold,
    )


# TODO: parameterize on the individual energy terms also
@pytest.mark.parametrize(
    "g",
    esp.data.esol(first=10),
)
def test_energy_angle_and_bond(g):
    # make simulation
    from espaloma.data.md import MoleculeVacuumSimulation

    # get simulation
    esp_simulation = MoleculeVacuumSimulation(
        n_samples=1,
        n_steps_per_sample=1000,
        forcefield="gaff-1.81",
        charge_method="gasteiger",
    )

    simulation = esp_simulation.simulation_from_graph(g)
    system = simulation.system
    esp_simulation.run(g, in_place=True)

    # if MD blows up, forget about it
    if g.nodes["n1"].data["xyz"].abs().max() > 100:
        pytest.skip("MD simulation blew up, skipping test.")

    forces = list(system.getForces())

    energies = {}

    for idx, force in enumerate(forces):
        force.setForceGroup(idx)

        name = force.__class__.__name__

        if "Nonbonded" in name:
            force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff)

            # epsilons = {}
            # sigmas = {}

            # for _idx in range(force.getNumParticles()):
            #     q, sigma, epsilon = force.getParticleParameters(_idx)

            #     # record parameters
            #     epsilons[_idx] = epsilon
            #     sigmas[_idx] = sigma

            #     force.setParticleParameters(_idx, 0., sigma, epsilon)

            # def sigma_combining_rule(sig1, sig2):
            #     return (sig1 + sig2) / 2

            # def eps_combining_rule(eps1, eps2):
            #     return np.sqrt(np.abs(eps1 * eps2))

            # for _idx in range(force.getNumExceptions()):
            #     idx0, idx1, q, sigma, epsilon = force.getExceptionParameters(
            #         _idx)
            #     force.setExceptionParameters(
            #         _idx,
            #         idx0,
            #         idx1,
            #         0.0,
            #         sigma_combining_rule(sigmas[idx0], sigmas[idx1]),
            #         eps_combining_rule(epsilons[idx0], epsilons[idx1])
            #     )

            # force.updateParametersInContext(_simulation.context)

    # create new simulation
    _simulation = openmm.app.Simulation(
        simulation.topology,
        system,
        openmm.VerletIntegrator(0.0),
    )

    _simulation.context.setPositions(
        g.nodes["n1"].data["xyz"][:, 0, :].detach().numpy() * unit.bohr
    )

    for idx, force in enumerate(forces):
        name = force.__class__.__name__

        state = _simulation.context.getState(
            getEnergy=True,
            getParameters=True,
            groups=2**idx,
        )

        energy = state.getPotentialEnergy().value_in_unit(
            esp.units.ENERGY_UNIT
        )

        energies[name] = energy

    # parametrize
    ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81")
    g = ff.parametrize(g)

    # n2 : bond, n3: angle, n1: nonbonded?
    # n1 : sigma (k), epsilon (eq), and charge (not included yet)
    for term in ["n2", "n3"]:
        g.nodes[term].data["k"] = g.nodes[term].data["k_ref"]
        g.nodes[term].data["eq"] = g.nodes[term].data["eq_ref"]

    """
    for term in ["n1"]:
        g.nodes[term].data["sigma"] = g.nodes[term].data["sigma_ref"]
        g.nodes[term].data["epsilon"] = g.nodes[term].data["epsilon_ref"]
        # g.nodes[term].data['q'] = g.nodes[term].data['q_ref']
    """

    for term in ["n4"]:
        g.nodes[term].data["phases"] = g.nodes[term].data["phases_ref"]
        g.nodes[term].data["periodicity"] = g.nodes[term].data[
            "periodicity_ref"
        ]
        g.nodes[term].data["k"] = g.nodes[term].data["k_ref"]

    # for each atom, store n_snapshots x 3
    # g.nodes["n1"].data["xyz"] = torch.tensor(
    #     simulation.context.getState(getPositions=True)
    #     .getPositions(asNumpy=True)
    #     .value_in_unit(esp.units.DISTANCE_UNIT),
    #     dtype=torch.float32,
    # )[None, :, :].permute(1, 0, 2)

    # print(g.nodes['n2'].data)
    esp.mm.geometry.geometry_in_graph(g.heterograph)
    esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3", "n4"])
    # writes into nodes
    # .data['u_nonbonded'], .data['u_onefour'], .data['u2'], .data['u3'],

    # TODO: consider more carefully how many decimals of precision are needed
    n_decimals = 3

    # test bonds
    npt.assert_almost_equal(
        g.nodes["g"].data["u_n2"].detach().numpy(),
        energies["HarmonicBondForce"],
        decimal=n_decimals,
    )

    # test angles
    npt.assert_almost_equal(
        g.nodes["g"].data["u_n3"].detach().numpy(),
        energies["HarmonicAngleForce"],
        decimal=n_decimals,
    )

    # propers = g.nodes["g"].data["u_n4"].detach().numpy()
    # impropers =  g.nodes["g"].data["u_n4_improper"].detach().numpy()
    # all_torsions = propers + impropers
    # npt.assert_almost_equal(
    #     all_torsions,
    #     energies["PeriodicTorsionForce"],
    #     decimal=n_decimals,
    # )

    # print(all_torsions)
    # print(energies["PeriodicTorsionForce"])

    # TODO:
    # This is not working now, matching OpenMM nonbonded.
    # test nonbonded
    # TODO: must set all charges to zero in _simulation for this to pass currently, since g doesn't have any charges
    # npt.assert_almost_equal(
    #     g.nodes['g'].data['u_nonbonded'].numpy()\
    #     + g.nodes['g'].data['u_onefour'].numpy(),
    #     energies['NonbondedForce'],
    #     decimal=3,
    # )


================================================
FILE: espaloma/mm/tests/test_recoverability.py
================================================
# Check whether we can recover a molecular mechanics model containing just one kind of term
# Initially, interested in recovering a molecular mechanics model containing only improper torsion terms

import numpy as np
from openff.toolkit.topology import Molecule, Topology
from openff.toolkit.typing.engines.smirnoff import ForceField
import openmm as mm
import pytest
import espaloma as esp

import torch


def _create_impropers_only_system(
    smiles: str = "CC1=C(C(=O)C2=C(C1=O)N3CC4C(C3(C2COC(=O)N)OC)N4)N",
) -> mm.System:
    """Create a simulation that contains only improper torsion terms,
    by parameterizing with openff-1.2.0 and deleting  all terms but impropers
    """

    molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)
    g = esp.Graph(molecule)

    topology = Topology.from_molecules(molecule)
    forcefield = ForceField("openff-1.2.0.offxml")
    openmm_system = forcefield.create_openmm_system(topology)

    # delete all forces except PeriodicTorsionForce
    is_torsion = (
        lambda force: "PeriodicTorsionForce" in force.__class__.__name__
    )
    for i in range(openmm_system.getNumForces())[::-1]:
        if not is_torsion(openmm_system.getForce(i)):
            openmm_system.removeForce(i)
    assert openmm_system.getNumForces() == 1
    torsion_force = openmm_system.getForce(0)
    assert is_torsion(torsion_force)

    # set k = 0 for any torsion that's not an improper
    indices = set(
        map(
            tuple,
            esp.graphs.utils.offmol_indices.improper_torsion_indices(
                molecule
            ),
        )
    )
    num_impropers_retained = 0
    for i in range(torsion_force.getNumTorsions()):
        (
            p1,
            p2,
            p3,
            p4,
            periodicity,
            phase,
            k,
        ) = torsion_force.getTorsionParameters(i)

        if (p1, p2, p3, p4) in indices:
            num_impropers_retained += 1
        else:
            torsion_force.setTorsionParameters(
                i, p1, p2, p3, p4, periodicity, phase, 0.0
            )

    assert (
        num_impropers_retained > 0
    )  # otherwise this molecule is not a useful test case!

    return openmm_system, topology, g


@pytest.mark.skip(reason="too slow")
def test_improper_recover():
    import openmm
    from openmm import unit
    from openmm.app import Simulation
    from openmm.unit import Quantity

    TEMPERATURE = 500 * unit.kelvin
    STEP_SIZE = 1 * unit.femtosecond
    COLLISION_RATE = 1 / unit.picosecond

    system, topology, g = _create_impropers_only_system()

    # use langevin integrator, although it's not super useful here
    integrator = openmm.LangevinIntegrator(
        TEMPERATURE, COLLISION_RATE, STEP_SIZE
    )

    # initialize simulation
    simulation = Simulation(
        topology=topology, system=system, integrator=integrator
    )

    import openff.toolkit

    # get conformer
    g.mol.generate_conformers(
        toolkit_registry=openff.toolkit.utils.RDKitToolkitWrapper(),
    )

    # put conformer in simulation
    simulation.context.setPositions(g.mol.conformers[0])

    # minimize energy
    simulation.minimizeEnergy()

    # set velocities
    simulation.context.setVelocitiesToTemperature(TEMPERATURE)

    samples = []
    us = []

    # loop through number of samples
    for _ in range(10):

        # run MD for `self.n_steps_per_sample` steps
        simulation.step(10)

        # append samples to `samples`
        samples.append(
            simulation.context.getState(getPositions=True)
            .getPositions(asNumpy=True)
            .value_in_unit(esp.units.DISTANCE_UNIT)
        )

        us.append(
            simulation.context.getState(getEnergy=True)
            .getPotentialEnergy()
            .value_in_unit(esp.units.ENERGY_UNIT)
        )

    # put samples into an array
    samples = np.array(samples)
    us = np.array(us)

    # put samples into tensor
    samples = torch.tensor(samples, dtype=torch.float32)
    us = torch.tensor(us, dtype=torch.float32)[None, :, None]

    g.heterograph.nodes["n1"].data["xyz"] = samples.permute(1, 0, 2)

    # require gradient for force matching
    g.heterograph.nodes["n1"].data["xyz"].requires_grad = True

    g.heterograph.nodes["g"].data["u_ref"] = us

    # parametrize
    layer = esp.nn.dgl_legacy.gn()
    net = torch.nn.Sequential(
        esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]),
        esp.nn.readout.janossy.JanossyPoolingImproper(
            in_features=32,
            config=[32, "tanh"],
            out_features={
                "k": 6,
            },
        ),
        esp.mm.geometry.GeometryInGraph(),
        esp.mm.energy.EnergyInGraph(terms=["n4_improper"]),
    )

    optimizer = torch.optim.Adam(net.parameters(), 1e-3)

    for _ in range(1500):
        optimizer.zero_grad()

        net(g.heterograph)
        u_ref = g.nodes["g"].data["u"]
        u = g.nodes["g"].data["u_ref"]
        loss = torch.nn.MSELoss()(u_ref, u)
        loss.backward()
        print(loss)
        optimizer.step()

    assert loss.detach().numpy().item() < 0.1


# caffeine_smiles = 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C'
#
#
# def _create_random_impropers_only_system(smiles: str = caffeine_smiles, k_stddev: float = 10.0) -> mm.System:
#     """Create an OpenMM system that contains only a large number of improper torsion terms,
#     assigning random coefficients ~ N(0, k_stddev) kJ/mol"""
#
#     molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)
#
#     topology = Topology.from_molecules(molecule)
#     forcefield = ForceField('openff-1.2.0.offxml')
#     openmm_system = forcefield.create_openmm_system(topology)
#
#     # delete all forces
#     while openmm_system.getNumForces() > 0:
#         openmm_system.removeForce(0)
#
#     # add a torsion force
#     torsion_force = mm.PeriodicTorsionForce()
#
#     # for each improper torsion abcd, sample a periodicity, phase, and k, then add 3 terms to torsion_force
#     # with different indices abcd, acdb, adbc but identical periodicity, phase, and k
#     indices = esp.graphs.utils.offmol_indices.improper_torsion_indices(molecule)
#     improper_perms = [(0, 1, 2, 3), (0, 2, 3, 1), (0, 3, 1, 2)]
#
#     for inds in indices:
#         periodicity = np.random.randint(1, 7)
#         phase = 0
#         k = np.random.randn() * k_stddev
#         for perm in improper_perms:
#             p1, p2, p3, p4 = [int(inds[p]) for p in perm]  # careful to pass python ints rather than np ints to openmm
#             torsion_force.addTorsion(p1, p2, p3, p4, periodicity, phase, k)
#
#     openmm_system.addForce(torsion_force)
#
#     return openmm_system

# TODO: integration test where we recover this molecular mechanics system from energies/forces


================================================
FILE: espaloma/mm/torsion.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import espaloma as esp


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================
def periodic_torsion(
    x, k, periodicity=list(range(1, 7)), phases=[0.0 for _ in range(6)]
):
    """Periodic torsion potential

    Parameters
    ----------
    x : `torch.Tensor`, `shape = (batch_size, 1)`
        Dihedral value.
    k : `torch.Tensor`, `shape = (batch_size, n_phases)`
        Force constants.
    periodicity : `torch.Tensor`, `shape = (batch_size, n_phases)`
        Periodicities
    phases : `torch.Tensor`, `shape = (batch_size, n_phases)`
        Phase offsets

    Returns
    -------
    u : `torch.Tensor`, `shape = (batch_size, 1)`
        Energy.

    """

    # NOTE:
    # 0.5 because all torsions are calculated twice
    out = 0.5 * esp.mm.functional.periodic(
        x=x,
        k=k,
        periodicity=periodicity,
        phases=phases,
    )
    # assert(out.shape == (len(x), 1))
    return out


def angle_angle(
    u_angle_left,
    u_angle_right,
    k_angle_angle,
):

    u_angle_left = u_angle_left - u_angle_left.min(dim=-1, keepdims=True)[0]
    u_angle_right = (
        u_angle_right - u_angle_right.min(dim=-1, keepdims=True)[0]
    )
    return k_angle_angle * (u_angle_left**0.5) * (u_angle_right**0.5)


def angle_torsion(
    u_angle_left,
    u_angle_right,
    u_torsion,
    k_angle_torsion,
):
    u_angle_left = u_angle_left - u_angle_left.min(dim=-1, keepdims=True)[0]
    u_angle_right = (
        u_angle_right - u_angle_right.min(dim=-1, keepdims=True)[0]
    )
    return (
        k_angle_torsion * (u_angle_left**0.5) * u_torsion
        + k_angle_torsion * (u_angle_right**0.5) * u_torsion
    )


def angle_angle_torsion(
    u_angle_left,
    u_angle_right,
    u_torsion,
    k_angle_angle_torsion,
):
    u_angle_left = u_angle_left - u_angle_left.min(dim=-1, keepdims=True)[0]
    u_angle_right = (
        u_angle_right - u_angle_right.min(dim=-1, keepdims=True)[0]
    )
    return (
        k_angle_angle_torsion
        * (u_angle_left**0.5)
        * (u_angle_right**0.5)
        * u_torsion
    )


def bond_torsion(
    u_bond_left,
    u_bond_right,
    u_bond_center,
    u_torsion,
    k_side_torsion,
    k_center_torsion,
):

    u_bond_left = u_bond_left - u_bond_left.min(dim=-1, keepdims=True)[0]
    u_bond_right = u_bond_right - u_bond_right.min(dim=-1, keepdims=True)[0]
    u_bond_center = (
        u_bond_center - u_bond_center.min(dim=-1, keepdims=True)[0]
    )
    return (
        k_side_torsion * u_torsion * (u_bond_left**0.5)
        + k_side_torsion * u_torsion * (u_bond_right**0.5)
        + k_center_torsion * u_torsion * (u_bond_center**0.5)
    )


================================================
FILE: espaloma/nn/__init__.py
================================================
from . import baselines, layers, readout, sequential
from .layers import dgl_legacy
from .sequential import Sequential


================================================
FILE: espaloma/nn/baselines.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch


# =============================================================================
# MODULE CLASSES
# =============================================================================
class FreeParameterBaseline(torch.nn.Module):
    """Parametrize a graph by populating the parameters with free
    `torch.nn.Parameter`.


    """

    def __init__(self, g_ref):
        super(FreeParameterBaseline, self).__init__()
        self.g_ref = g_ref

        # whenever there is a reference parameter,
        # assign a `torch.nn.Parameter`
        for term in self.g_ref.ntypes:
            for param, param_value in self.g_ref.nodes[term].data.items():
                if param.endswith("_ref") and "u" not in param:
                    setattr(
                        self,
                        "%s_%s" % (term, param.replace("_ref", "")),
                        torch.nn.Parameter(
                            torch.zeros_like(
                                param_value.clone().detach(),
                            )
                        ),
                    )

    def forward(self, g):
        update_dicts = {node: {} for node in self.g_ref.ntypes}

        for term in self.g_ref.ntypes:
            for param, param_value in self.g_ref.nodes[term].data.items():
                if param.endswith("_ref"):
                    if hasattr(
                        self, "%s_%s" % (term, param.replace("_ref", ""))
                    ):

                        update_dicts[term][
                            param.replace("_ref", "")
                        ] = getattr(
                            self,
                            "%s_%s" % (term, param.replace("_ref", "")),
                        )

        for node, update_dict in update_dicts.items():
            for param, param_value in update_dict.items():
                g.nodes[node].data[param] = param_value

        return g


class FreeParameterBaselineInitMean(torch.nn.Module):
    """Parametrize a graph by populating the parameters with free
    `torch.nn.Parameter`.

    """

    def __init__(self, g_ref):
        super(FreeParameterBaselineInitMean, self).__init__()
        self.g_ref = g_ref

        # whenever there is a reference parameter,
        # assign a `torch.nn.Parameter`
        for term in self.g_ref.ntypes:
            for param, param_value in self.g_ref.nodes[term].data.items():
                if param.endswith("_ref") and "u" not in param:
                    setattr(
                        self,
                        "%s_%s" % (term, param.replace("_ref", "")),
                        torch.nn.Parameter(
                            torch.ones_like(
                                param_value.clone().detach(),
                            )
                            * param_value.clone().detach().mean()
                        ),
                    )

    def forward(self, g):
        update_dicts = {node: {} for node in self.g_ref.ntypes}

        for term in self.g_ref.ntypes:
            for param, param_value in self.g_ref.nodes[term].data.items():
                if param.endswith("_ref"):
                    if hasattr(
                        self, "%s_%s" % (term, param.replace("_ref", ""))
                    ):

                        update_dicts[term][
                            param.replace("_ref", "")
                        ] = getattr(
                            self,
                            "%s_%s" % (term, param.replace("_ref", "")),
                        )

        for node, update_dict in update_dicts.items():
            for param, param_value in update_dict.items():
                g.nodes[node].data[param] = param_value

        return g


================================================
FILE: espaloma/nn/layers/__init__.py
================================================
import espaloma.nn.layers.dgl_legacy


================================================
FILE: espaloma/nn/layers/dgl_legacy.py
================================================
""" Legacy models from DGL.

"""

# =============================================================================
# IMPORTS
# =============================================================================
import torch

# =============================================================================
# CONSTANT
# =============================================================================
DEFAULT_MODEL_KWARGS = {
    "SAGEConv": {"aggregator_type": "mean"},
    "GATConv": {"num_heads": 4},
    "TAGConv": {"k": 2},
}


# =============================================================================
# MODULE CLASSES
# =============================================================================
class GN(torch.nn.Module):
    def __init__(
        self,
        in_features,
        out_features,
        model_name="GraphConv",
        kwargs={},
    ):
        super(GN, self).__init__()
        from dgl.nn import pytorch as dgl_pytorch

        if kwargs == {}:
            if model_name in DEFAULT_MODEL_KWARGS:
                kwargs = DEFAULT_MODEL_KWARGS[model_name]

        self.gn = getattr(dgl_pytorch.conv, model_name)(
            in_features, out_features, **kwargs
        )

        # register these properties here for downstream handling
        self.in_features = in_features
        self.out_features = out_features

    def forward(self, g, x):
        return self.gn(g, x)


# =============================================================================
# MODULE FUNCTIONS
# =============================================================================


def gn(model_name="GraphConv", kwargs={}):
    from dgl.nn import pytorch as dgl_pytorch

    if model_name == "GINConv":
        return lambda in_features, out_features: dgl_pytorch.conv.GINConv(
            apply_func=torch.nn.Linear(in_features, out_features),
            aggregator_type="sum",
        )

    else:
        return lambda in_features, out_features: GN(
            in_features=in_features,
            out_features=out_features,
            model_name=model_name,
            kwargs=kwargs,
        )


================================================
FILE: espaloma/nn/readout/__init__.py
================================================
from . import janossy, graph_level_readout, node_typing, charge_equilibrium


================================================
FILE: espaloma/nn/readout/base_readout.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import abc

import torch


# =============================================================================
# BASE CLASSES
# =============================================================================
class BaseReadout(abc.ABC, torch.nn.Module):
    """Base class for readout function."""

    def __init__(self):
        super(BaseReadout, self).__init__()

    @abc.abstractmethod
    def forward(self, g, x=None, *args, **kwargs):
        raise NotImplementedError

    def _forward(self, g, x, *args, **kwargs):
        raise NotImplementedError


================================================
FILE: espaloma/nn/readout/charge_equilibrium.py
================================================
""" Charge equilibrium.ß

"""
# =============================================================================
# IMPORTS
# =============================================================================
import torch

# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def get_charges(node):
    """ Solve the function to get the absolute charges of atoms in a
    molecule from parameters.
    Parameters
    ----------
    e : tf.Tensor, dtype = tf.float32,
        electronegativity.
    s : tf.Tensor, dtype = tf.float32,
        hardness.
    Q : tf.Tensor, dtype = tf.float32, shape=(),
        total charge of a molecule.
    We use Lagrange multipliers to analytically give the solution.
    $$
    U({\bf q})
    &= \sum_{i=1}^N \left[ e_i q_i +  \frac{1}{2}  s_i q_i^2\right]
        - \lambda \, \left( \sum_{j=1}^N q_j - Q \right) \\
    &= \sum_{i=1}^N \left[
        (e_i - \lambda) q_i +  \frac{1}{2}  s_i q_i^2 \right
        ] + Q
    $$
    This gives us:
    $$
    q_i^*
    &= - e_i s_i^{-1}
    + \lambda s_i^{-1} \\
    &= - e_i s_i^{-1}
    + s_i^{-1} \frac{
        Q +
         \sum\limits_{i=1}^N e_i \, s_i^{-1}
        }{\sum\limits_{j=1}^N s_j^{-1}}
    $$
    """
    e = node.data["e"]
    s = node.data["s"]
    sum_e_s_inv = node.data["sum_e_s_inv"]
    sum_s_inv = node.data["sum_s_inv"]
    sum_q = node.data["sum_q"]

    return {
        "q": -e * s**-1
        + (s**-1) * torch.div(sum_q + sum_e_s_inv, sum_s_inv)
    }


# =============================================================================
# MODULE CLASS
# =============================================================================
class ChargeEquilibrium(torch.nn.Module):
    """Charge equilibrium within batches of molecules."""

    def __init__(self):
        super(ChargeEquilibrium, self).__init__()

    def forward(self, g, total_charge=0.0):
        """apply charge equilibrium to all molecules in batch"""
        # calculate $s ^ {-1}$ and $ es ^ {-1}$
        import dgl

        g.apply_nodes(
            lambda node: {"s_inv": node.data["s"] ** -1}, ntype="n1"
        )

        g.apply_nodes(
            lambda node: {"e_s_inv": node.data["e"] * node.data["s"] ** -1},
            ntype="n1",
        )

        if "sum_q" not in g.nodes["g"].data:
            if "q_ref" in g.nodes["n1"].data:
                # get total charge
                g.update_all(
                    dgl.function.copy_u(u="q_ref", out="m_q"),
                    dgl.function.sum(msg="m_q", out="sum_q"),
                    etype="n1_in_g",
                )
            else:
                g.nodes["g"].data["sum_q"] = (
                    torch.ones(
                        g.batch_size,
                        1,
                        device=g.nodes["n1"].data["s"].device,
                    )
                    * total_charge
                )

        g.update_all(
            dgl.function.copy_u(u="sum_q", out="m_sum_q"),
            dgl.function.sum(msg="m_sum_q", out="sum_q"),
            etype="g_has_n1",
        )

        # get the sum of $s^{-1}$ and $m_s^{-1}$
        g.update_all(
            dgl.function.copy_u(u="s_inv", out="m_s_inv"),
            dgl.function.sum(msg="m_s_inv", out="sum_s_inv"),
            etype="n1_in_g",
        )

        g.update_all(
            dgl.function.copy_u(u="e_s_inv", out="m_e_s_inv"),
            dgl.function.sum(msg="m_e_s_inv", out="sum_e_s_inv"),
            etype="n1_in_g",
        )

        g.update_all(
            dgl.function.copy_u(u="sum_s_inv", out="m_sum_s_inv"),
            dgl.function.sum(msg="m_sum_s_inv", out="sum_s_inv"),
            etype="g_has_n1",
        )

        g.update_all(
            dgl.function.copy_u(u="sum_e_s_inv", out="m_sum_e_s_inv"),
            dgl.function.sum(msg="m_sum_e_s_inv", out="sum_e_s_inv"),
            etype="g_has_n1",
        )

        g.apply_nodes(get_charges, ntype="n1")

        return g


================================================
FILE: espaloma/nn/readout/graph_level_readout.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch
import espaloma as esp

# =============================================================================
# MODULE CLASSES
# =============================================================================
class GraphLevelReadout(torch.nn.Module):
    """Readout from graph level."""

    def __init__(
        self,
        in_features,
        config_local,
        config_global,
        out_name,
        pool=None,
    ):

        super(GraphLevelReadout, self).__init__()
        import dgl

        if pool is None:
            pool = dgl.function.sum
        self.in_features = in_features
        self.config_local = config_local
        self.config_global = config_global
        self.d_local = esp.nn.sequential._Sequential(
            in_features=in_features,
            config=config_local,
            layer=torch.nn.Linear,
        )

        mid_features = [x for x in config_local if isinstance(x, int)][-1]

        self.d_global = esp.nn.sequential._Sequential(
            in_features=mid_features,
            config=config_global,
            layer=torch.nn.Linear,
        )

        self.pool = pool
        self.out_name = out_name

    def forward(self, g):
        import dgl

        g.apply_nodes(
            lambda node: {"h_global": self.d_local(None, node.data["h"])},
            ntype="n1",
        )

        g.update_all(
            dgl.function.copy_u("h_global", "m"),
            self.pool("m", "h_global"),
            etype="n1_in_g",
        )

        g.apply_nodes(
            lambda node: {
                self.out_name: self.d_global(None, node.data["h_global"])
            },
            ntype="g",
        )

        return g


================================================
FILE: espaloma/nn/readout/janossy.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch

import espaloma as esp


# =============================================================================
# MODULE CLASSES
# =============================================================================
class JanossyPooling(torch.nn.Module):
    """Janossy pooling (arXiv:1811.01900) to average node representation
    for higher-order nodes.


    """

    def __init__(
        self,
        config,
        in_features,
        out_features={
            1: ["sigma", "epsilon", "q"],
            2: ["k", "eq"],
            3: ["k", "eq"],
            4: ["k", "eq"],
        },
        out_features_dimensions=-1,
        pool=torch.add,
    ):
        super(JanossyPooling, self).__init__()

        # if users specify out features as lists,
        # assume dimensions to be all zero
        for level in out_features.keys():
            if isinstance(out_features[level], list):
                out_features[level] = dict(
                    zip(out_features[level], [1 for _ in out_features[level]])
                )

        # bookkeeping
        self.out_features = out_features
        self.levels = [key for key in out_features.keys() if key != 1]
        self.pool = pool

        # get output features
        mid_features = [x for x in config if isinstance(x, int)][-1]

        # set up networks
        for level in self.levels:

            # set up individual sequential networks
            setattr(
                self,
                "sequential_%s" % level,
                esp.nn.sequential._Sequential(
                    in_features=in_features * level,
                    config=config,
                    layer=torch.nn.Linear,
                ),
            )

            for feature, dimension in self.out_features[level].items():
                setattr(
                    self,
                    "f_out_%s_to_%s" % (level, feature),
                    torch.nn.Linear(
                        mid_features,
                        dimension,
                    ),
                )

        if 1 not in self.out_features:
            return

        # atom level
        self.sequential_1 = esp.nn.sequential._Sequential(
            in_features=in_features, config=config, layer=torch.nn.Linear
        )

        for feature, dimension in self.out_features[1].items():
            setattr(
                self,
                "f_out_1_to_%s" % feature,
                torch.nn.Linear(
                    mid_features,
                    dimension,
                ),
            )

    def forward(self, g):
        """Forward pass.

        Parameters
        ----------
        g : dgl.DGLHeteroGraph,
            input graph.
        """
        import dgl

        # copy
        g.multi_update_all(
            {
                "n1_as_%s_in_n%s"
                % (relationship_idx, big_idx): (
                    dgl.function.copy_u("h", "m%s" % relationship_idx),
                    dgl.function.mean(
                        "m%s" % relationship_idx, "h%s" % relationship_idx
                    ),
                )
                for big_idx in self.levels
                for relationship_idx in range(big_idx)
            },
            cross_reducer="sum",
        )

        # pool
        for big_idx in self.levels:

            if g.number_of_nodes("n%s" % big_idx) == 0:
                continue

            g.apply_nodes(
                func=lambda nodes: {
                    feature: getattr(
                        self, "f_out_%s_to_%s" % (big_idx, feature)
                    )(
                        self.pool(
                            getattr(self, "sequential_%s" % big_idx)(
                                None,
                                torch.cat(
                                    [
                                        nodes.data["h%s" % relationship_idx]
                                        for relationship_idx in range(big_idx)
                                    ],
                                    dim=1,
                                ),
                            ),
                            getattr(self, "sequential_%s" % big_idx)(
                                None,
                                torch.cat(
                                    [
                                        nodes.data["h%s" % relationship_idx]
                                        for relationship_idx in range(
                                            big_idx - 1, -1, -1
                                        )
                                    ],
                                    dim=1,
                                ),
                            ),
                        ),
                    )
                    for feature in self.out_features[big_idx].keys()
                },
                ntype="n%s" % big_idx,
            )

        if 1 not in self.out_features:
            return g

        # atom level
        g.apply_nodes(
            func=lambda nodes: {
                feature: getattr(self, "f_out_1_to_%s" % feature)(
                    self.sequential_1(g=None, x=nodes.data["h"])
                )
                for feature in self.out_features[1].keys()
            },
            ntype="n1",
        )

        return g


class JanossyPoolingImproper(torch.nn.Module):
    """Janossy pooling (arXiv:1811.01900) to average node representation
    for improper torsions.


    """

    def __init__(
        self,
        config,
        in_features,
        out_features={
            "k": 2,
        },
        out_features_dimensions=-1,
    ):
        super(JanossyPoolingImproper, self).__init__()

        # if users specify out features as lists,
        # assume dimensions to be all zero

        # bookkeeping
        self.out_features = out_features
        self.levels = ["n4_improper"]

        # get output features
        mid_features = [x for x in config if isinstance(x, int)][-1]

        # set up networks
        for level in self.levels:

            # set up individual sequential networks
            setattr(
                self,
                "sequential_%s" % level,
                esp.nn.sequential._Sequential(
                    in_features=4 * in_features,
                    config=config,
                    layer=torch.nn.Linear,
                ),
            )

            for feature, dimension in self.out_features.items():
                setattr(
                    self,
                    "f_out_%s_to_%s" % (level, feature),
                    torch.nn.Linear(
                        mid_features,
                        dimension,
                    ),
                )

    def forward(self, g):
        """Forward pass.

        Parameters
        ----------
        g : dgl.DGLHeteroGraph,
            input graph.
        """
        import dgl

        # copy
        g.multi_update_all(
            {
                "n1_as_%s_in_%s"
                % (relationship_idx, big_idx): (
                    dgl.function.copy_u("h", "m%s" % relationship_idx),
                    dgl.function.mean(
                        "m%s" % relationship_idx, "h%s" % relationship_idx
                    ),
                )
                for big_idx in self.levels
                for relationship_idx in range(4)
            },
            cross_reducer="sum",
        )

        if g.number_of_nodes("n4_improper") == 0:
            return g

        # pool
        #   sum over three cyclic permutations of "h0", "h2", "h3", assuming "h1" is the central atom in the improper
        #   following the smirnoff trefoil convention [(0, 1, 2, 3), (2, 1, 3, 0), (3, 1, 0, 2)]
        #   https://github.com/openff.toolkit/openff.toolkit/blob/166c9864de3455244bd80b2c24656bd7dda3ae2d/openff.toolkit/typing/engines/smirnoff/parameters.py#L3326-L3360

        ## Set different permutations based on which definition of impropers
        ##  are being used
        permuts = [(0, 1, 2, 3), (2, 1, 3, 0), (3, 1, 0, 2)]
        stack_permuts = lambda nodes, p: torch.cat(
            [nodes.data[f"h{i}"] for i in p], dim=1
        )

        for big_idx in self.levels:
            inner_net = getattr(self, f"sequential_{big_idx}")

            g.apply_nodes(
                func=lambda nodes: {
                    feature: getattr(self, f"f_out_{big_idx}_to_{feature}")(
                        torch.sum(
                            torch.stack(
                                [
                                    inner_net(
                                        g=None, x=stack_permuts(nodes, p)
                                    )
                                    for p in permuts
                                ],
                                dim=0,
                            ),
                            dim=0,
                        )
                    )
                    for feature in self.out_features.keys()
                },
                ntype=big_idx,
            )

        return g


class JanossyPoolingWithSmirnoffImproper(torch.nn.Module):
    """Janossy pooling (arXiv:1811.01900) to average node representation
    for improper torsions.
    """

    def __init__(
        self,
        config,
        in_features,
        out_features={
            "k": 2,
        },
        out_features_dimensions=-1,
    ):
        super(JanossyPoolingWithSmirnoffImproper, self).__init__()

        # if users specify out features as lists,
        # assume dimensions to be all zero

        # bookkeeping
        self.out_features = out_features
        self.levels = ["n4_improper"]

        # get output features
        mid_features = [x for x in config if isinstance(x, int)][-1]

        # set up networks
        for level in self.levels:

            # set up individual sequential networks
            setattr(
                self,
                "sequential_%s" % level,
                esp.nn.sequential._Sequential(
                    in_features=4 * in_features,
                    config=config,
                    layer=torch.nn.Linear,
                ),
            )

            for feature, dimension in self.out_features.items():
                setattr(
                    self,
                    "f_out_%s_to_%s" % (level, feature),
                    torch.nn.Linear(
                        mid_features,
                        dimension,
                    ),
                )

    def forward(self, g):
        """Forward pass.

        Parameters
        ----------
        g : dgl.DGLHeteroGraph,
            input graph.
        """
        import dgl

        # copy
        g.multi_update_all(
            {
                "n1_as_%s_in_%s"
                % (relationship_idx, big_idx): (
                    dgl.function.copy_u("h", "m%s" % relationship_idx),
                    dgl.function.mean(
                        "m%s" % relationship_idx, "h%s" % relationship_idx
                    ),
                )
                for big_idx in self.levels
                for relationship_idx in range(4)
            },
            cross_reducer="sum",
        )

        if g.number_of_nodes("n4_improper") == 0:
            return g

        # pool
        #   sum over three cyclic permutations of "h0", "h2", "h3", assuming "h1" is the central atom in the improper
        #   following the smirnoff trefoil convention [(0, 1, 2, 3), (2, 1, 3, 0), (3, 1, 0, 2)]
        #   https://github.com/openff.toolkit/openff.toolkit/blob/166c9864de3455244bd80b2c24656bd7dda3ae2d/openff.toolkit/typing/engines/smirnoff/parameters.py#L3326-L3360

        ## Set different permutations based on which definition of impropers
        ##  are being used
        permuts = [(0, 1, 2, 3), (0, 2, 3, 1), (0, 3, 1, 2)]
        stack_permuts = lambda nodes, p: torch.cat(
            [nodes.data[f"h{i}"] for i in p], dim=1
        )

        for big_idx in self.levels:
            inner_net = getattr(self, f"sequential_{big_idx}")

            g.apply_nodes(
                func=lambda nodes: {
                    feature: getattr(self, f"f_out_{big_idx}_to_{feature}")(
                        torch.sum(
                            torch.stack(
                                [
                                    inner_net(
                                        g=None, x=stack_permuts(nodes, p)
                                    )
                                    for p in permuts
                                ],
                                dim=0,
                            ),
                            dim=0,
                        )
                    )
                    for feature in self.out_features.keys()
                },
                ntype=big_idx,
            )

        return g


class JanossyPoolingNonbonded(torch.nn.Module):
    """Janossy pooling (arXiv:1811.01900) to average node representation
    for nonbonded interactions.


    """

    def __init__(
        self,
        config,
        in_features,
        out_features={"sigma": 1, "epsilon": 1},
        out_features_dimensions=-1,
    ):
        super(JanossyPoolingNonbonded, self).__init__()

        # if users specify out features as lists,
        # assume dimensions to be all zero

        # bookkeeping
        self.out_features = out_features
        self.levels = ["onefour", "nonbonded"]

        # get output features
        mid_features = [x for x in config if isinstance(x, int)][-1]

        # set up networks
        for level in self.levels:

            # set up individual sequential networks
            setattr(
                self,
                "sequential_%s" % level,
                esp.nn.sequential._Sequential(
                    in_features=2 * in_features,
                    config=config,
                    layer=torch.nn.Linear,
                ),
            )

            for feature, dimension in self.out_features.items():
                setattr(
                    self,
                    "f_out_%s_to_%s" % (level, feature),
                    torch.nn.Linear(
                        mid_features,
                        dimension,
                    ),
                )

    def forward(self, g):
        """Forward pass.

        Parameters
        ----------
        g : dgl.DGLHeteroGraph,
            input graph.
        """

        # copy
        g.multi_update_all(
            {
                "n1_as_%s_in_%s"
                % (relationship_idx, big_idx): (
                    dgl.function.copy_u("h", "m%s" % relationship_idx),
                    dgl.function.mean(
                        "m%s" % relationship_idx, "h%s" % relationship_idx
                    ),
                )
                for big_idx in self.levels
                for relationship_idx in range(2)
            },
            cross_reducer="sum",
        )

        for big_idx in self.levels:

            g.apply_nodes(
                func=lambda nodes: {
                    feature: getattr(
                        self, "f_out_%s_to_%s" % (big_idx, feature)
                    )(
                        torch.sum(
                            torch.stack(
                                [
                                    getattr(self, "sequential_%s" % big_idx)(
                                        g=None,
                                        x=torch.cat(
                                            [
                                                nodes.data["h0"],
                                                nodes.data["h1"],
                                            ],
                                            dim=1,
                                        ),
                                    ),
                                    getattr(self, "sequential_%s" % big_idx)(
                                        g=None,
                                        x=torch.cat(
                                            [
                                                nodes.data["h1"],
                                                nodes.data["h0"],
                                            ],
                                            dim=1,
                                        ),
                                    ),
                                ],
                                dim=0,
                            ),
                            dim=0,
                        )
                    )
                    for feature in self.out_features.keys()
                },
                ntype=big_idx,
            )

        return g


class ExpCoefficients(torch.nn.Module):
    def forward(self, g):
        import math

        g.nodes["n2"].data["coefficients"] = (
            g.nodes["n2"].data["log_coefficients"].exp()
        )
        g.nodes["n3"].data["coefficients"] = (
            g.nodes["n3"].data["log_coefficients"].exp()
        )
        return g


class LinearMixtureToOriginal(torch.nn.Module):
    def forward(self, g):
        import math

        (
            g.nodes["n2"].data["k"],
            g.nodes["n2"].data["eq"],
        ) = esp.mm.functional.linear_mixture_to_original(
            g.nodes["n2"].data["coefficients"][:, 0][:, None],
            g.nodes["n2"].data["coefficients"][:, 1][:, None],
            1.5,
            6.0,
        )

        (
            g.nodes["n3"].data["k"],
            g.nodes["n3"].data["eq"],
        ) = esp.mm.functional.linear_mixture_to_original(
            g.nodes["n3"].data["coefficients"][:, 0][:, None],
            g.nodes["n3"].data["coefficients"][:, 1][:, None],
            0.0,
            math.pi,
        )

        g.nodes["n3"].data.pop("coefficients")
        g.nodes["n2"].data.pop("coefficients")
        return g


================================================
FILE: espaloma/nn/readout/node_typing.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
import torch

from espaloma.nn.readout.base_readout import BaseReadout


# =============================================================================
# MODULE CLASSES
# =============================================================================
class NodeTyping(BaseReadout):
    """Simple typing on homograph."""

    def __init__(self, in_features, n_classes=100):
        super(NodeTyping, self).__init__()
        self.c = torch.nn.Linear(in_features, n_classes)

    def forward(self, g):
        g.apply_nodes(
            ntype="n1",
            func=lambda node: {"nn_typing": self.c(node.data["h"])},
        )
        return g


================================================
FILE: espaloma/nn/sequential.py
================================================
""" Chain mutiple layers of GN together.
"""
import torch


class _Sequential(torch.nn.Module):
    """Sequentially staggered neural networks."""

    def __init__(
        self,
        layer,
        config,
        in_features,
        model_kwargs={},
    ):
        super(_Sequential, self).__init__()

        self.exes = []

        # init dim
        dim = in_features

        # parse the config
        for idx, exe in enumerate(config):

            try:
                exe = float(exe)

                if exe >= 1:
                    exe = int(exe)
            except BaseException:
                pass

            # int -> feedfoward
            if isinstance(exe, int):
                setattr(self, "d" + str(idx), layer(dim, exe, **model_kwargs))

                dim = exe
                self.exes.append("d" + str(idx))

            # str -> activation
            elif isinstance(exe, str):
                if exe == "bn":
                    setattr(self, "a" + str(idx), torch.nn.BatchNorm1d(dim))

                else:
                    activation = getattr(torch.nn.functional, exe)
                    setattr(self, "a" + str(idx), activation)

                self.exes.append("a" + str(idx))

            # float -> dropout
            elif isinstance(exe, float):
                dropout = torch.nn.Dropout(exe)
                setattr(self, "o" + str(idx), dropout)

                self.exes.append("o" + str(idx))

    def forward(self, g, x):
        for exe in self.exes:
            if exe.startswith("d"):
                if g is not None:
                    x = getattr(self, exe)(g, x)
                else:
                    x = getattr(self, exe)(x)
            else:
                x = getattr(self, exe)(x)

        return x


class Sequential(torch.nn.Module):
    """Sequential neural network with input layers.

    Parameters
    ----------
    layer : torch.nn.Module
        DGL graph convolution layers.

    config : List
        A sequence of numbers (for units) and strings (for activation functions)
        denoting the configuration of the sequential model.

    feature_units : int(default=114)
        The number of input channels.

    Methods
    -------
    forward(g, x)
        Forward pass.
    """

    def __init__(
        self,
        layer,
        config,
        feature_units=114,
        input_units=128,
        model_kwargs={},
    ):
        super(Sequential, self).__init__()

        # initial featurization
        self.f_in = torch.nn.Sequential(
            torch.nn.Linear(feature_units, input_units), torch.nn.Tanh()
        )

        self._sequential = _Sequential(
            layer, config, in_features=input_units, model_kwargs=model_kwargs
        )

    def _forward(self, g, x):
        """Forward pass with graph and features."""
        for exe in self.exes:
            if exe.startswith("d"):
                x = getattr(self, exe)(g, x)
            else:
                x = getattr(self, exe)(x)

        return x

    def forward(self, g, x=None):
        """Forward pass.

        Parameters
        ----------
        g : `dgl.DGLHeteroGraph`,
            input graph

        Returns
        -------
        g : `dgl.DGLHeteroGraph`
            output graph
        """
        import dgl

        # get homogeneous subgraph
        g_ = dgl.to_homogeneous(g.edge_type_subgraph(["n1_neighbors_n1"]))

        if x is None:
            # get node attributes
            x = g.nodes["n1"].data["h0"]
            x = self.f_in(x)

        # message passing on homo graph
        x = self._sequential(g_, x)

        # put attribute back in the graph
        g.nodes["n1"].data["h"] = x

        return g


================================================
FILE: espaloma/nn/tests/test_baseline.py
================================================
import pytest


@pytest.fixture
def baseline():
    import espaloma as esp

    g = esp.Graph("c1ccccc1")

    # get force field
    forcefield = esp.graphs.legacy_force_field.LegacyForceField(
        "smirnoff99Frosst-1.1.0"
    )

    # param / typing
    operation = forcefield.parametrize

    operation(g)

    baseline = esp.nn.baselines.FreeParameterBaseline(g_ref=g.heterograph)

    return baseline


def test_init(baseline):
    baseline


def test_parameter(baseline):
    print(list(baseline.parameters()))

    assert len(list(baseline.parameters())) > 0


================================================
FILE: espaloma/nn/tests/test_janossy.py
================================================
import pytest


def test_small_net():
    import torch

    import espaloma as esp

    # define a layer
    layer = esp.nn.layers.dgl_legacy.gn("GraphConv")

    # define a representation
    representation = esp.nn.Sequential(
        layer, [32, "tanh", 32, "tanh", 32, "tanh"]
    )

    # define a readout
    readout = esp.nn.readout.janossy.JanossyPooling(
        config=[32, "tanh"], in_features=32
    )

    net = torch.nn.Sequential(representation, readout)

    g = esp.Graph("c1ccccc1")


================================================
FILE: espaloma/nn/tests/test_simple_net.py
================================================
import pytest


def test_small_net():
    import torch

    import espaloma as esp

    layer = esp.nn.dgl_legacy.gn()
    net = esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"])


================================================
FILE: espaloma/units.py
================================================
# =============================================================================
# IMPORTS
# =============================================================================
from openmm import unit

# =============================================================================
# CONSTANTS
# =============================================================================

# scaled units
PARTICLE = unit.mole.create_unit(
    6.02214076e23**-1,
    "particle",
    "particle",
)

HARTREE_PER_PARTICLE = unit.hartree / PARTICLE

# basic units
DISTANCE_UNIT = unit.bohr
ENERGY_UNIT = HARTREE_PER_PARTICLE
FORCE_UNIT = ENERGY_UNIT / DISTANCE_UNIT
ANGLE_UNIT = unit.radian
CHARGE_UNIT = unit.elementary_charge

# compose units
FORCE_CONSTANT_UNIT = ENERGY_UNIT / (DISTANCE_UNIT**2)
ANGLE_FORCE_CONSTANT_UNIT = ENERGY_UNIT / (ANGLE_UNIT**2)
COULOMB_CONSTANT_UNIT = (
    ENERGY_UNIT * DISTANCE_UNIT / ((unit.elementary_charge**2))
)

GAS_CONSTANT = (
    8.31446261815324 * unit.joule * (unit.kelvin**-1) * (unit.mole**-1)
).value_in_unit(HARTREE_PER_PARTICLE / unit.kelvin)


================================================
FILE: espaloma/utils/geometry.py
================================================
import numpy as np


def _sample_unit_circle(n_samples: int = 1) -> np.ndarray:
    """
    >>> np.isclose(np.linalg.norm(_sample_unit_circle(1)), 1)
    True

    """
    theta = np.random.rand(n_samples) * 2 * np.pi
    x = np.cos(theta)
    y = np.sin(theta)
    xy = np.array([x, y]).T
    assert xy.shape == (n_samples, 2)
    return xy


def _sample_four_particle_torsion_scan(n_samples: int = 1) -> np.ndarray:
    """Generate n_samples random configurations of a 4-particle system abcd where
    * distances ab, bc, cd are constant,
    * angles abc, bcd are constant
    * dihedral angle abcd is uniformly distributed in [0, 2pi]

    Returns
    -------
    xyz : np.ndarray, shape = (n_samples, 4, 3)

    Notes
    -----
    * Positions of a,b,c are constant, and x-coordinate of d is constant.
        To be more exacting, could add random displacements and rotations.
    """
    a = (-3, -1, 0)
    b = (-2, 0, 0)
    c = (-1, 0, 0)
    d = (0, 1, 0)

    # form one 3D configuration
    conf = np.array([a, b, c, d])
    assert conf.shape == (4, 3)

    # make n_samples copies
    xyz = np.array([conf] * n_samples, dtype=float)
    assert xyz.shape == (n_samples, 4, 3)

    # assign y and z coordinates of particle d to unit-circle samples
    xyz[:, 3, 1:] = _sample_unit_circle(n_samples)

    return xyz


def _timemachine_signed_torsion_angle(ci, cj, ck, cl):
    """Reference implementation from Yutong Zhao's timemachine

    Copied directly from
    https://github.com/proteneer/timemachine/blob/1a0ab45e605dc1e28c44ea90f38cb0dedce5c4db/timemachine/potentials/bonded.py#L152-L199
    (but with 3 lines of dead code removed, and delta_r inlined)
    """

    rij = cj - ci
    rkj = cj - ck
    rkl = cl - ck

    n1 = np.cross(rij, rkj)
    n2 = np.cross(rkj, rkl)

    y = np.sum(
        np.multiply(
            np.cross(n1, n2),
            rkj / np.linalg.norm(rkj, axis=-1, keepdims=True),
        ),
        axis=-1,
    )
    x = np.sum(np.multiply(n1, n2), -1)

    return np.arctan2(y, x)


================================================
FILE: espaloma/utils/model_fetch.py
================================================
from pathlib import Path
from typing import Any, Union

import requests
import torch.utils.model_zoo
from tqdm import tqdm


def _get_model_url(version: str) -> str:
    """
    Get the URL of the espaloma model from GitHub releases.

    Parameters:
        version (str): Version of the model. If set to "latest", the URL for the latest version will be returned.

    Returns:
        str: The URL of the espaloma model.

    Note:
        - If version is set to "latest", the URL for the latest version of the model will be returned.
        - The URL is obtained from the GitHub releases of the espaloma repository.

    Example:
        >>> url = _get_model_url(version="0.3.0")
    """

    if version == "latest":
        url = "https://github.com/choderalab/espaloma/releases/latest/download/espaloma-latest.pt"
    else:
        # TODO: This scheme requires the version string of the model to match the
        # release version
        url = f"https://github.com/choderalab/espaloma/releases/download/{version}/espaloma-{version}.pt"

    return url


def get_model_path(
    model_dir: Union[str, Path] = ".espaloma/",
    version: str = "latest",
    disable_progress_bar: bool = False,
    overwrite: bool = False,
) -> Path:
    """
    Download a model for espaloma.

    Parameters:
        model_dir (str or Path): Directory path where the model will be saved. Default is ``.espaloma/``.
        version (str): Version of the model to download. Default is "latest".
        disable_progress_bar (bool): Whether to disable the progress bar during the download. Default is False.
        overwrite (bool): Whether to overwrite the existing model file if it exists. Default is False.

    Returns:
        Path: The path to the downloaded model file.

    Raises:
        FileExistsError: If the model file already exists and overwrite is set to False.

    Note:
        - If version is set to "latest", the latest version of the model will be downloaded.
        - The model will be downloaded from GitHub releases.
        - The model file will be saved in the specified model directory.

    Example:
        >>> model_path = get_model(model_dir=".espaloma/", version="0.3.0", disable_progress_bar=True)
    """

    url = _get_model_url(version)

    # This will work as long as we never have a "/" in the version string
    file_name = Path(url.split("/")[-1])
    model_dir = Path(model_dir)
    model_path = Path(model_dir / file_name)

    if not overwrite and model_path.exists():
        raise FileExistsError(
            f"File '{model_path}' exiits, use overwrite=True to overwrite file"
        )
    model_dir.mkdir(parents=True, exist_ok=True)

    request = requests.get(url, stream=True)
    request_lenght = int(request.headers.get("content-length", 0))
    with open(model_path, "wb") as file, tqdm(
        total=request_lenght,
        unit="iB",
        unit_scale=True,
        unit_divisor=1024,
        disable=disable_progress_bar,
    ) as progress:
        for data in request.iter_content(chunk_size=1024):
            size = file.write(data)
            progress.update(size)

    return model_path


def get_model(version: str = "latest") -> dict[str, Any]:
    """
        Load an espaloma model from GitHub releases.

    Parameters:
        version (str): Version of the model to load. Default is "latest".

    Returns:
        dict[str, Any]: The loaded espaloma model.

    Note:
        - If version is set to "latest", the latest version of the model will be loaded.
        - The model will be loaded from GitHub releases.
        - The model will be loaded onto the CPU.

    Example:
        >>> model = get_model(version="0.3.0")
    """

    url = _get_model_url(version)
    model = torch.utils.model_zoo.load_url(url, map_location="cpu")
    model.eval()  # type: ignore

    return model


================================================
FILE: espaloma/utils/tests/test_model_fetch.py
================================================
import espaloma as esp
import torch
from openff.toolkit.topology import Molecule


def test_get_model_path(tmp_path):
    model_dir = tmp_path / "latest"
    model_path = esp.get_model_path(model_dir=model_dir, disable_progress_bar=True)

    molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
    molecule_graph = esp.Graph(molecule)

    espaloma_model = torch.load(model_path)
    espaloma_model.eval()
    espaloma_model(molecule_graph.heterograph)


def test_get_model(tmp_path):
    espaloma_model = esp.get_model()

    molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C")
    molecule_graph = esp.Graph(molecule)
    espaloma_model(molecule_graph.heterograph)


================================================
FILE: requirements.txt
================================================
dgl
torch
matplotlib
pandas
numpy
qcportal


================================================
FILE: scripts/README.md
================================================
# Miscellaneous auxiliary scripts for demonstrating espaloma

* `perses-benchmark/` - relative alchemical free energy calculations with [perses](http://github.com/choderalab/perses) using espaloma to parameterize small molecules via []`openmmforcefields`](https://github.com/openmm/openmmforcefields)


================================================
FILE: scripts/perses-benchmark/README.md
================================================
# Relative alchemical free energy calculations

This is an example of using [perses](http://github.com/choderalab/perses) using espaloma to parameterize small molecules via []`openmmforcefields`](https://github.com/openmm/openmmforcefields)

* `tyk2/` - JACS tyk2 system

## Installing perses and espaloma

To install perses and espaloma together:
```bash
conda env create -n espaloma-perses -f espaloma-perses.yaml
```
To reproduce environment used in paper (on linux-64)
```bash
conda env create -n espaloma-perses -f espaloma-perses.export.yaml
```


================================================
FILE: scripts/perses-benchmark/espaloma-perses.export.yaml
================================================
name: espaloma-perses
channels:
  - dglteam
  - psi4
  - conda-forge
  - openeye
  - defaults
dependencies:
  - _libgcc_mutex=0.1=conda_forge
  - _openmp_mutex=4.5=1_gnu
  - alabaster=0.7.12=py_0
  - ambertools=21.9=py39h69e27f8_0
  - argon2-cffi=21.3.0=pyhd8ed1ab_0
  - argon2-cffi-bindings=21.2.0=py39h3811e60_1
  - arpack=3.7.0=hdefa2d7_2
  - arrow-cpp=2.0.0=py39h5894ca3_15_cpu
  - arsenic=0.2.1=py39hf3d152e_0
  - asttokens=2.0.5=pyhd8ed1ab_0
  - astunparse=1.6.3=pyhd8ed1ab_0
  - attrs=21.4.0=pyhd8ed1ab_0
  - aws-c-common=0.4.59=h36c2ea0_1
  - aws-c-event-stream=0.1.6=had2084c_6
  - aws-checksums=0.1.10=h4e93380_0
  - aws-sdk-cpp=1.8.70=h57dc084_1
  - babel=2.9.1=pyh44b312d_0
  - backcall=0.2.0=pyh9f0ad1d_0
  - backports=1.1=pyhd3eb1b0_0
  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
  - beautifulsoup4=4.10.0=pyha770c72_0
  - blas=1.0=mkl
  - bleach=5.0.0=pyhd8ed1ab_0
  - blosc=1.21.0=h9c3ff4c_0
  - bokeh=2.4.2=py39hf3d152e_0
  - boost=1.74.0=py39h5472131_5
  - boost-cpp=1.74.0=hc6e9bd1_3
  - brotli=1.0.9=h166bdaf_7
  - brotli-bin=1.0.9=h166bdaf_7
  - brotlipy=0.7.0=py39hb9d737c_1004
  - bzip2=1.0.8=h7f98852_4
  - c-ares=1.18.1=h7f98852_0
  - ca-certificates=2022.3.29=h06a4308_0
  - cached-property=1.5.2=hd8ed1ab_1
  - cached_property=1.5.2=pyha770c72_1
  - cairo=1.16.0=h6cf1ce9_1008
  - certifi=2021.10.8=py39hf3d152e_2
  - cffi=1.15.0=py39h4bc2ebd_0
  - cftime=1.6.0=py39hd257fcd_0
  - charset-normalizer=2.0.12=pyhd8ed1ab_0
  - click=8.1.2=py39hf3d152e_0
  - cloudpickle=2.0.0=pyhd8ed1ab_0
  - codecov=2.1.11=pyhd3deb0d_0
  - colorama=0.4.4=pyh9f0ad1d_0
  - coverage=6.3.2=py39hb9d737c_2
  - cryptography=36.0.0=py39h9ce1e76_0
  - cudatoolkit=10.2.89=h8f6ccaa_10
  - curl=7.82.0=h2283fc2_0
  - cycler=0.11.0=pyhd8ed1ab_0
  - cython=0.29.28=py39h5a03fae_2
  - cytoolz=0.11.2=py39hb9d737c_2
  - dask=2022.4.0=pyhd8ed1ab_0
  - dask-core=2022.4.0=pyhd8ed1ab_0
  - dask-jobqueue=0.7.3=pyhd8ed1ab_0
  - debugpy=1.5.1=py39he80948d_0
  - decorator=5.1.1=pyhd8ed1ab_0
  - defusedxml=0.7.1=pyhd8ed1ab_0
  - dgl=0.8.0post2=py39_0
  - dicttoxml=1.7.4=pyhd8ed1ab_2
  - distributed=2022.4.0=pyhd8ed1ab_0
  - docutils=0.17.1=py39hf3d152e_1
  - entrypoints=0.4=pyhd8ed1ab_0
  - executing=0.8.3=pyhd8ed1ab_0
  - expat=2.4.8=h27087fc_0
  - fftw=3.3.10=nompi_h77c792f_102
  - fire=0.4.0=pyh44b312d_0
  - flit-core=3.7.1=pyhd8ed1ab_0
  - fontconfig=2.14.0=h8e229c2_0
  - freetype=2.11.0=h70c0345_0
  - fsspec=2022.3.0=pyhd8ed1ab_0
  - future=0.18.2=py39hf3d152e_5
  - gettext=0.21.0=hf68c758_0
  - gflags=2.2.2=he1b5a44_1004
  - giflib=5.2.1=h516909a_2
  - glog=0.4.0=h49b9bf7_3
  - greenlet=1.1.2=py39h5a03fae_2
  - grpc-cpp=1.34.1=h2157cd5_4
  - h5py=3.6.0=nompi_py39h7e08c79_100
  - hdf4=4.2.15=h10796ff_3
  - hdf5=1.12.1=nompi_h4df4325_104
  - heapdict=1.0.1=py_0
  - icu=68.2=h9c3ff4c_0
  - idna=3.3=pyhd8ed1ab_0
  - imagesize=1.3.0=pyhd8ed1ab_0
  - importlib-metadata=4.11.3=py39hf3d152e_1
  - importlib_resources=5.6.0=pyhd8ed1ab_0
  - iniconfig=1.1.1=pyh9f0ad1d_0
  - intel-openmp=2021.4.0=h06a4308_3561
  - ipykernel=6.12.0=py39hef51801_0
  - ipython=8.2.0=py39hf3d152e_0
  - ipython_genutils=0.2.0=py_1
  - ipywidgets=7.7.0=pyhd8ed1ab_0
  - jedi=0.18.1=py39hf3d152e_1
  - jinja2=3.1.1=pyhd8ed1ab_0
  - joblib=1.1.0=pyhd8ed1ab_0
  - jpeg=9e=h7f98852_0
  - jsonschema=4.4.0=pyhd8ed1ab_0
  - jupyter_client=7.2.2=pyhd8ed1ab_1
  - jupyter_core=4.9.2=py39hf3d152e_0
  - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0
  - jupyterlab_widgets=1.1.0=pyhd8ed1ab_0
  - keyutils=1.6.1=h166bdaf_0
  - kiwisolver=1.4.2=py39hf939315_1
  - krb5=1.19.3=h08a2579_0
  - lcms2=2.12=hddcbb42_0
  - ld_impl_linux-64=2.36.1=hea4e1c9_2
  - libblas=3.9.0=12_linux64_mkl
  - libbrotlicommon=1.0.9=h166bdaf_7
  - libbrotlidec=1.0.9=h166bdaf_7
  - libbrotlienc=1.0.9=h166bdaf_7
  - libcblas=3.9.0=12_linux64_mkl
  - libcurl=7.82.0=h2283fc2_0
  - libedit=3.1.20210910=h7f8727e_0
  - libev=4.33=h516909a_1
  - libevent=2.1.10=h28343ad_4
  - libffi=3.4.2=h7f98852_5
  - libgcc-ng=11.2.0=h1d223b6_15
  - libgfortran-ng=11.2.0=h69a702a_15
  - libgfortran5=11.2.0=h5c6108e_15
  - libglib=2.70.2=h174f98d_4
  - libgomp=11.2.0=h1d223b6_15
  - libiconv=1.16=h516909a_0
  - liblapack=3.9.0=12_linux64_mkl
  - libllvm10=10.0.1=he513fc3_3
  - libnetcdf=4.8.1=nompi_hb3fd0d9_101
  - libnghttp2=1.47.0=he49606f_0
  - libnsl=2.0.0=h7f98852_0
  - libpng=1.6.37=hed695b0_2
  - libprotobuf=3.14.0=h780b84a_0
  - libsodium=1.0.18=h516909a_1
  - libssh2=1.10.0=ha35d2d1_2
  - libstdcxx-ng=11.2.0=he4da1e4_15
  - libthrift=0.13.0=hfb8234f_6
  - libtiff=4.2.0=hbd63e13_2
  - libutf8proc=2.7.0=h7f98852_0
  - libuuid=2.32.1=h14c3975_1000
  - libwebp=1.2.2=h55f646e_0
  - libwebp-base=1.2.2=h7f98852_1
  - libxcb=1.14=h7b6447c_0
  - libxml2=2.9.12=h72842e0_0
  - libxslt=1.1.33=h15afd5d_2
  - libzip=1.8.0=h1c5bbd1_1
  - libzlib=1.2.11=h166bdaf_1014
  - llvmlite=0.36.0=py39h1bbdace_0
  - locket=0.2.1=py39h06a4308_2
  - lxml=4.8.0=py39hb9d737c_1
  - lz4=4.0.0=py39h029007f_1
  - lz4-c=1.9.3=h9c3ff4c_1
  - lzo=2.10=h516909a_1000
  - markupsafe=2.1.1=py39hb9d737c_1
  - matplotlib=3.3.2=0
  - matplotlib-base=3.3.2=py39h98787fa_1
  - matplotlib-inline=0.1.3=pyhd8ed1ab_0
  - mdtraj=1.9.7=py39h138c130_1
  - mistune=0.8.4=py39h3811e60_1005
  - mkl=2021.4.0=h06a4308_640
  - mkl-service=2.4.0=py39h3811e60_0
  - mpiplus=v0.0.1=py39hde42818_1002
  - msgpack-python=1.0.3=py39hf939315_1
  - nbclient=0.5.13=pyhd8ed1ab_0
  - nbconvert=6.4.5=py39hf3d152e_0
  - nbformat=5.3.0=pyhd8ed1ab_0
  - ncurses=6.3=h27087fc_1
  - nest-asyncio=1.5.5=pyhd8ed1ab_0
  - netcdf-fortran=4.5.4=nompi_h2b6e579_100
  - netcdf4=1.5.8=nompi_py39h64b754b_101
  - networkx=2.7.1=pyhd8ed1ab_0
  - nglview=3.0.3=pyh8a188c0_0
  - ninja=1.10.2=h4bd325d_1
  - nose=1.3.7=py_1006
  - nose-timer=1.0.1=pyhd8ed1ab_0
  - notebook=6.4.10=pyha770c72_0
  - numba=0.53.1=py39h56b8d98_1
  - numexpr=2.8.1=py39h6abb31d_0
  - numpy=1.22.3=py39h18676bf_1
  - numpydoc=1.2.1=pyhd8ed1ab_0
  - ocl-icd=2.3.1=h7f98852_0
  - ocl-icd-system=1.0.0=1
  - openeye-toolkits=2021.2.0=py39_0
  - openff-forcefields=2.0.0=pyh6c4a22f_0
  - openff-toolkit=0.10.3=pyhd8ed1ab_0
  - openff-toolkit-base=0.10.3=pyhd8ed1ab_0
  - openmm=7.7.0=py39h9717219_1
  - openmmtools=0.21.2=pyhd8ed1ab_0
  - openmoltools=0.8.8=pyhd8ed1ab_1
  - openssl=3.0.2=h166bdaf_1
  - orc=1.6.6=h7950760_1
  - packaging=21.3=pyhd8ed1ab_0
  - packmol=20.010=h86c2bf4_0
  - pandas=1.4.2=py39h1832856_0
  - pandoc=2.17.1.1=ha770c72_0
  - pandocfilters=1.5.0=pyhd8ed1ab_0
  - parmed=3.4.3=py39he80948d_1
  - parquet-cpp=1.5.1=1
  - parso=0.8.3=pyhd8ed1ab_0
  - partd=1.2.0=pyhd8ed1ab_0
  - patsy=0.5.2=pyhd8ed1ab_0
  - pcre=8.45=h9c3ff4c_0
  - pdbfixer=1.8.1=pyh6c4a22f_0
  - perl=5.32.1=2_h7f98852_perl5
  - perses=0.9.5=pyh8a188c0_0
  - pexpect=4.8.0=pyh9f0ad1d_2
  - pickleshare=0.7.5=py39hde42818_1002
  - pillow=9.0.1=py39h22f2fdc_0
  - pint=0.19.1=pyhd8ed1ab_0
  - pip=22.0.4=pyhd8ed1ab_0
  - pixman=0.40.0=h36c2ea0_0
  - plotly=5.7.0=pyhd8ed1ab_0
  - pluggy=1.0.0=py39hf3d152e_3
  - prometheus_client=0.14.0=pyhd8ed1ab_0
  - prompt-toolkit=3.0.29=pyha770c72_0
  - psutil=5.9.0=py39hb9d737c_1
  - ptyprocess=0.7.0=pyhd3deb0d_0
  - pure_eval=0.2.2=pyhd8ed1ab_0
  - py=1.11.0=pyh6c4a22f_0
  - pyarrow=2.0.0=py39h3ebc44c_15_cpu
  - pycairo=1.21.0=py39h0934665_1
  - pycparser=2.21=pyhd8ed1ab_0
  - pydantic=1.9.0=py39hb9d737c_1
  - pygments=2.11.2=pyhd8ed1ab_0
  - pymbar=3.0.6=py39hd257fcd_0
  - pyopenssl=22.0.0=pyhd8ed1ab_0
  - pyparsing=3.0.7=pyhd8ed1ab_0
  - pyrsistent=0.18.1=py39hb9d737c_1
  - pysocks=1.7.1=py39hf3d152e_5
  - pytables=3.7.0=py39h2669a42_0
  - pytest=7.1.1=py39hf3d152e_1
  - pytest-cov=3.0.0=pyhd8ed1ab_0
  - python=3.9.12=h2660328_1_cpython
  - python-dateutil=2.8.2=pyhd8ed1ab_0
  - python-fastjsonschema=2.15.3=pyhd8ed1ab_0
  - python_abi=3.9=2_cp39
  - pytorch=1.10.2=cpu_py39hfa7516b_0
  - pytz=2022.1=pyhd8ed1ab_0
  - pyyaml=6.0=py39hb9d737c_4
  - pyzmq=22.3.0=py39headdf64_2
  - qcelemental=0.24.0=pyhd8ed1ab_0
  - qcportal=0.15.8=pyhd8ed1ab_0
  - rdkit=2022.03.1=py39h89e00b9_0
  - re2=2020.11.01=h58526e2_0
  - readline=8.1.2=h7f8727e_1
  - reportlab=3.5.68=py39he59360d_1
  - requests=2.27.1=pyhd8ed1ab_0
  - scikit-learn=1.0.2=py39h4dfa638_0
  - scipy=1.8.0=py39hee8e79c_1
  - seaborn=0.11.2=hd8ed1ab_0
  - seaborn-base=0.11.2=pyhd8ed1ab_0
  - send2trash=1.8.0=pyhd8ed1ab_0
  - setuptools=62.0.0=py39hf3d152e_0
  - six=1.16.0=pyh6c4a22f_0
  - smirnoff99frosst=1.1.0=pyh44b312d_0
  - snappy=1.1.8=he1b5a44_3
  - snowballstemmer=2.2.0=pyhd8ed1ab_0
  - sortedcontainers=2.4.0=pyhd8ed1ab_0
  - soupsieve=2.3.1=pyhd8ed1ab_0
  - sphinx=4.5.0=pyh6c4a22f_0
  - sphinx_rtd_theme=1.0.0=pyhd8ed1ab_0
  - sphinxcontrib-applehelp=1.0.2=py_0
  - sphinxcontrib-devhelp=1.0.2=py_0
  - sphinxcontrib-htmlhelp=2.0.0=pyhd8ed1ab_0
  - sphinxcontrib-jsmath=1.0.1=py_0
  - sphinxcontrib-qthelp=1.0.3=py_0
  - sphinxcontrib-serializinghtml=1.1.5=pyhd8ed1ab_1
  - sqlalchemy=1.4.35=py39hb9d737c_0
  - sqlite=3.38.2=hc218d9a_0
  - stack_data=0.2.0=pyhd8ed1ab_0
  - statsmodels=0.13.2=py39hce5d2b2_0
  - tblib=1.7.0=pyhd8ed1ab_0
  - tenacity=8.0.1=pyhd8ed1ab_0
  - termcolor=1.1.0=py_2
  - terminado=0.13.3=py39hf3d152e_1
  - testpath=0.6.0=pyhd8ed1ab_0
  - threadpoolctl=3.1.0=pyh8a188c0_0
  - tinydb=4.7.0=pyhd8ed1ab_0
  - tk=8.6.12=h27826a3_0
  - toml=0.10.2=pyhd8ed1ab_0
  - tomli=2.0.1=pyhd8ed1ab_0
  - toolz=0.11.2=pyhd8ed1ab_0
  - tornado=6.1=py39hb9d737c_3
  - tqdm=4.64.0=pyhd8ed1ab_0
  - traitlets=5.1.1=pyhd8ed1ab_0
  - typing-extensions=4.1.1=hd8ed1ab_0
  - typing_extensions=4.1.1=pyha770c72_0
  - tzdata=2022a=h191b570_0
  - urllib3=1.26.9=pyhd8ed1ab_0
  - validators=0.18.2=pyhd3deb0d_0
  - wcwidth=0.2.5=pyh9f0ad1d_2
  - webencodings=0.5.1=py_1
  - wheel=0.37.1=pyhd8ed1ab_0
  - widgetsnbextension=3.6.0=py39hf3d152e_0
  - xmltodict=0.12.0=py_0
  - xorg-kbproto=1.0.7=h14c3975_1002
  - xorg-libice=1.0.10=h516909a_0
  - xorg-libsm=1.2.3=hd9c2040_1000
  - xorg-libx11=1.7.2=h7f98852_0
  - xorg-libxext=1.3.4=h7f98852_1
  - xorg-libxrender=0.9.10=h7f98852_1003
  - xorg-libxt=1.2.1=h7f98852_2
  - xorg-renderproto=0.11.1=h14c3975_1002
  - xorg-xextproto=7.3.0=h14c3975_1002
  - xorg-xproto=7.0.31=h14c3975_1007
  - xz=5.2.5=h516909a_1
  - yaml=0.2.5=h7f98852_2
  - zeromq=4.3.4=h9c3ff4c_1
  - zict=2.1.0=pyhd8ed1ab_0
  - zipp=3.8.0=pyhd8ed1ab_0
  - zlib=1.2.11=h166bdaf_1014
  - zstd=1.4.9=ha95c52a_0
  - pip:
    - amberlite==16.0
    - amberutils==21.0
    - espaloma==0.2.2
    - mmpbsa-py==16.0
    - openmmforcefields==0.10.0+27.g1fabf43
    - packmol-memgen==1.2.1rc0
    - pdb4amber==20.1
    - pytraj==2.0.6
    - sander==16.0
prefix: /lila/home/chodera/miniconda/envs/espaloma-perses


================================================
FILE: scripts/perses-benchmark/espaloma-perses.yaml
================================================
name: espaloma-perses
channels:
  - conda-forge
  - dglteam
  - openeye
  - defaults
  - anaconda
dependencies:
  # Base dependencies
  - python
  - pip
  # 3rd party
  - openeye-toolkits
  - numpy
  - matplotlib
  - scipy
  - openff-toolkit
  - openff-forcefields
  - smirnoff99Frosst
  - openmm
  - openmmforcefields
  - tqdm
  # Pytorch
  - pytorch>=1.8.0
  - dgl
  # Testing
  - pytest
  - pytest-cov
  - codecov
  - nose
  - nose-timer
  - coverage
  - qcportal>=0.15.0
  - sphinx
  - sphinx_rtd_theme
  # perses
  - perses
  # will be added to openmmforcefields conda-forge recipe
  - validators
  - pip:
    # espaloma
    - git+https://github.com/choderalab/espaloma.git@0.2.2
    # openmmforcefield
    - git+https://github.com/openmm/openmmforcefields.git


================================================
FILE: scripts/perses-benchmark/tyk2/README.md
================================================
# tyk2 benchmarks with perses and espaloma

* `openff-1.2.0/` - scripts to use Open Force Field ("Parsley") `openff-1.2.0` small molecule force field
* `espaloma-0.2.2/` - scripts to use Espaloma `espaloma-0.2.2` small molecule force field


================================================
FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/LSF-job-template.sh
================================================
#!/bin/bash
#BSUB -P "tyk2-benchmark"
#BSUB -J "perses-benchmark-[1-24]"
#BSUB -n 1
#BSUB -R rusage[mem=8]
#BSUB -R span[hosts=1]
#BSUB -q gpuqueue
#BSUB -sp 1 # low priority. default is 12, max is 25
#BSUB -gpu num=1:j_exclusive=yes:mode=shared
#BSUB -W  24:00
#BSUB -o out_%J_%I.stdout
#BSUB -eo out_%J_%I.stderr
#BSUB -L /bin/bash

source ~/.bashrc
OPENMM_CPU_THREADS=1

echo "changing directory to ${LS_SUBCWD}"
cd $LS_SUBCWD
conda activate espaloma-perses

# Report node in use
hostname

# Report CUDA info
env | sort | grep 'CUDA'

# launching a benchmark pair (target, edge) per job (0-based thus substract 1)
python run_benchmarks.py --target tyk2 --edge $(( $LSB_JOBINDEX - 1 ))


================================================
FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/README.md
================================================
# Perses benchmarks

This subdirectory exposes a CLI tool for running automated benchmarks from
[OpenFF's protein ligand benchmark dataset](https://github.com/openforcefield/protein-ligand-benchmark) using perses.

## Running all edges

A script to run all transformations in an LSF batch scheduler is provided, but will likely need to be modified for your batch queue system:
```bash
bsub < LSF-job-template.sh
```

## Running single edges

Assuming you have a clone of the perses code repository and you are standing in the `benchmarks` subdirectory
(where this file lives). Then the benchmarks can be run using the following command syntax:
```bash
python run_benchmarks.py --target [protein-name] --edge [edge-index]
```

For example, for running the seventh edge (zero-based, according to [plbenchmark data](https://github.com/openforcefield/protein-ligand-benchmark) )
for `tyk2` protein, you would run:
```bash
# Set up and run edge 6
python run_benchmarks.py --target tyk2 --edge 6
```
Should the calculation for an edge fail, you can simply re-run the same command-line and the calculation will resume:
```bash
# Resume failed edge 6
python run_benchmarks.py --target tyk2 --edge 6
```
For more information on how to use the tool, you can run `python run_benchmarks.py -h`.

## Analyzing benchmarks

To analyze the simulations a script called `benchmark_analysis.py` is used as follows:
```bash
python benchmark_analysis.py --target [protein-name]
```

For example, for tyk2 results:
```bash
python benchmark_analysis.py --target tyk2
```
This will generate an output CSV file for [`arsenic`](https://github.com/openforcefield/arsenic) and corresponding absolute and relative free energy plots as PNG files produced according to best practices.)

For more information on how to use the cli analysis tool use `python benchmark_analysis.py -h`.


================================================
FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/benchmark_analysis.py
================================================
"""
Script to perform analysis of perses simulations executed using run_benchmarks.py script.

Intended to be used on systems from https://github.com/openforcefield/protein-ligand-benchmark
"""

import argparse
import glob
import itertools
import re
import warnings

import numpy as np
import urllib.request
import yaml

from openmmtools.constants import kB
from perses.analysis.load_simulations import Simulation

from openmm import unit

from openff.arsenic import plotting, wrangle

# global variables
base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark"


# Helper functions

def get_simdir_list(base_dir='.', is_reversed=False):
    """
    Get list of directories to extract simulation data.

    Attributes
    ----------
    base_dir: str, optional, default='.'
        Base directory where to search for simulations results. Defaults to current directory.
    is_reversed: bool, optional, default=False
        Whether to consider the reversed simulations or not. Meant for testing purposes.

    Returns
    -------
    dir_list: list
        List of directories paths for simulation results.
    """
    # Load all expected simulation from directories
    out_dirs = ['/'.join(filepath.split('/')[:-1]) for filepath in glob.glob(f'{base_dir}/out*/*complex.nc')]
    reg = re.compile(r'out_[0-9]+_[0-9]+_reversed')  # regular expression to deal with reversed directories
    if is_reversed:
        # Choose only reversed directories
        out_dirs = list(filter(reg.search, out_dirs))
    else:
        # Filter out reversed directories
        out_dirs = list(itertools.filterfalse(reg.search, out_dirs))
    return out_dirs


def get_simulations_data(simulation_dirs):
    """Generates a list of simulation data objects given the simulation directories paths."""
    simulations = []
    for out_dir in simulation_dirs:
        # Load complete or fully working simulations
        # TODO: Try getting better exceptions from openmmtools -- use non-generic exceptions
        try:
            simulation = Simulation(out_dir)
            simulations.append(simulation)
        except Exception:
            warnings.warn(f"Edge in {out_dir} could not be loaded. Check simulation output is complete.")
    return simulations


def to_arsenic_csv(experimental_data: dict, simulation_data: list, out_csv: str = 'out_benchmark.csv'):
    """
    Generates a csv file to be used with openff-arsenic. Energy units in kcal/mol.

    .. warning:: To be deprecated once arsenic object model is improved.

    Parameters
    ----------
        experimental_data: dict
            Python nested dictionary with experimental data in micromolar or nanomolar units.
            Example of entry:

                {'lig_ejm_31': {'measurement': {'comment': 'Table 4, entry 31',
                  'doi': '10.1016/j.ejmech.2013.03.070',
                  'error': -1,
                  'type': 'ki',
                  'unit': 'uM',
                  'value': 0.096},
                  'name': 'lig_ejm_31',
                  'smiles': '[H]c1c(c(c(c(c1[H])Cl)C(=O)N([H])c2c(c(nc(c2[H])N([H])C(=O)C([H])([H])[H])[H])[H])Cl)[H]'}

        simulation_data: list or iterable
            Python iterable object with perses Simulation objects as entries.
        out_csv: str
            Path to output csv file to be generated.
    """
    # Ligand information
    ligands_names = list(ligands_dict.keys())
    lig_id_to_name = dict(enumerate(ligands_names))
    kBT = kB * 300 * unit.kelvin  # useful when converting to kcal/mol
    # Write csv file
    with open(out_csv, 'w') as csv_file:
        # Experimental block
        # print header for block
        csv_file.write("# Experimental block\n")
        csv_file.write("# Ligand, expt_DG, expt_dDG\n")
        # Extract ligand name, expt_DG and expt_dDG from ligands dictionary
        for ligand_name, ligand_data in experimental_data.items():
            # TODO: Handle multiple measurement types
            unit_symbol = ligand_data['measurement']['unit']
            measurement_value = ligand_data['measurement']['value']
            measurement_error = ligand_data['measurement']['error']
            # Unit conversion
            # TODO: Let's persuade PLBenchmarks to use pint units
            unit_conversions = { 'M' : 1.0, 'mM' : 1e-3, 'uM' : 1e-6, 'nM' : 1e-9, 'pM' : 1e-12, 'fM' : 1e-15 }
            if unit_symbol not in unit_conversions:
                raise ValueError(f'Unknown units "{unit_symbol}"')
            value_to_molar= unit_conversions[unit_symbol]
            # Handle unknown errors
            # TODO: We should be able to ensure that all entries have more reasonable errors.
            if measurement_error == -1:
                # TODO: For now, we use a relative_error from the Tyk2 system 10.1016/j.ejmech.2013.03.070
                relative_error = 0.3
            else:
                relative_error = measurement_error / measurement_value
            # Convert to free eneriges
            expt_DG = kBT.value_in_unit(unit.kilocalorie_per_mole) * np.log(measurement_value * value_to_molar)
            expt_dDG = kBT.value_in_unit(unit.kilocalorie_per_mole) * relative_error
            csv_file.write(f"{ligand_name}, {expt_DG}, {expt_dDG}\n")

        # Calculated block
        # print header for block
        csv_file.write("# Calculated block\n")
        csv_file.write("# Ligand1,Ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional)\n")
        # Loop through simulation, extract ligand1 and ligand2 indices, convert to names, create string with
        # ligand1, ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional)
        # write string in csv file
        for simulation in simulation_data:
            out_dir = simulation.directory.split('/')[-1]
            # getting integer indices
            ligand1_id, ligand2_id = int(out_dir.split('_')[-1]), int(out_dir.split('_')[-2])  # CHECK ORDER!
            # getting names of ligands
            ligand1, ligand2 = lig_id_to_name[ligand1_id], lig_id_to_name[ligand2_id]
            # getting calc_DDG in kcal/mol
            calc_DDG = simulation.bindingdg.value_in_unit(unit.kilocalorie_per_mole)
            # getting calc_dDDG in kcal/mol
            calc_dDDG = simulation.bindingddg.value_in_unit(unit.kilocalorie_per_mole)
            csv_file.write(
                f"{ligand1}, {ligand2}, {calc_DDG}, {calc_dDDG}, 0.0\n")  # hardcoding additional error as 0.0


# Defining command line arguments
# fetching targets from github repo
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
targets_url = f"{base_repo_url}/raw/master/data/targets.yml"
with urllib.request.urlopen(targets_url) as response:
    targets_dict = yaml.safe_load(response.read())
# get the possible choices from targets yaml file
target_choices = targets_dict.keys()

arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks analysis.')
arg_parser.add_argument(
    "--target",
    type=str,
    help="Target biomolecule, use openff's plbenchmark names.",
    choices=target_choices,
    required=True
)
arg_parser.add_argument(
    "--reversed",
    action='store_true',
    help="Analyze reversed edge simulations. Helpful for testing/consistency checks."
)
args = arg_parser.parse_args()
target = args.target

# Download experimental data
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
# TODO: Let's cache this data when we set up the initial simulations in case it changes in between setting up and running the calculations and analysis.
# TODO: Let's also be sure to use a specific release tag rather than 'master'
target_dir = targets_dict[target]['dir']
ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml"
with urllib.request.urlopen(ligands_url) as response:
    yaml_contents = response.read()
    print(yaml_contents)
    ligands_dict = yaml.safe_load(yaml_contents)

# DEBUG
print('')
print(yaml.dump(ligands_dict))

# Get paths for simulation output directories
out_dirs = get_simdir_list(is_reversed=args.reversed)

# Generate list with simulation objects
simulations = get_simulations_data(out_dirs)

# Generate csv file
csv_path = f'./{target}_arsenic.csv'
to_arsenic_csv(ligands_dict, simulations, out_csv=csv_path)


# TODO: Separate plotting in a different file
# Make plots and store
fe = wrangle.FEMap(csv_path)
# Relative plot
plotting.plot_DDGs(fe.graph,
                   target_name=f'{target}',
                   title=f'Relative binding energies - {target}',
                   figsize=5,
                   filename='./plot_relative.pdf'
                   )
# Absolute plot, with experimental data shifted to correct mean
experimental_mean_dg = np.asarray([node[1]["exp_DG"] for node in fe.graph.nodes(data=True)]).mean()
plotting.plot_DGs(fe.graph,
                  target_name=f'{target}',
                  title=f'Absolute binding energies - {target}',
                  figsize=5,
                  filename='./plot_absolute.pdf',
                  shift=experimental_mean_dg,
                  )


================================================
FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/run_benchmarks.py
================================================
#!/usr/bin/env python

"""
CLI utility to automatically run benchmarks using data from the open force field protein-ligand benchmark at
https://github.com/openforcefield/protein-ligand-benchmark

It requires internet connection to function properly, by connecting to the mentioned repository.
"""
# TODO: Use plbenchmarks when conda package is available.

import argparse
import logging
import os
import yaml

from perses.app.setup_relative_calculation import run
from perses.utils.url_utils import retrieve_file_url
from perses.utils.url_utils import fetch_url_contents

# Setting logging level config
LOGLEVEL = os.environ.get("LOGLEVEL", "DEBUG").upper()
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=LOGLEVEL,
    datefmt='%Y-%m-%d %H:%M:%S')
_logger = logging.getLogger()
_logger.setLevel(LOGLEVEL)

# global variables
base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark"


def concatenate_files(input_files, output_file):
    """
    Concatenate files given in input_files iterator into output_file.
    """
    with open(output_file, 'w') as outfile:
        for filename in input_files:
            with open(filename) as infile:
                for line in infile:
                    outfile.write(line)


def run_relative_perturbation(lig_a_idx, lig_b_idx, reverse=False, tidy=True):
    """
    Perform relative free energy simulation using perses CLI.

    Parameters
    ----------
        lig_a_idx : int
            Index for first ligand (ligand A)
        lig_b_idx : int
            Index for second ligand (ligand B)
        reverse: bool
            Run the edge in reverse direction. Swaps the ligands.
        tidy : bool, optional
            remove auto-generated yaml files.

    Expects the target/protein pdb file in the same directory to be called 'target.pdb', and ligands file
    to be called 'ligands.sdf'.
    """
    _logger.info(f'Starting relative calculation of ligand {lig_a_idx} to {lig_b_idx}')
    trajectory_directory = f'out_{lig_a_idx}_{lig_b_idx}'
    new_yaml = f'relative_{lig_a_idx}_{lig_b_idx}.yaml'

    # read base template yaml file
    # TODO: template.yaml file is configured for Tyk2, check if the same options work for others.
    with open(f'template.yaml', "r") as yaml_file:
        options = yaml.load(yaml_file, Loader=yaml.FullLoader)

    # TODO: add a step to perform some minimization - should help with NaNs
    # generate yaml file from template
    options['protein_pdb'] = 'target.pdb'
    options['ligand_file'] = 'ligands.sdf'
    if reverse:
        # Do the other direction of ligands
        options['old_ligand_index'] = lig_b_idx
        options['new_ligand_index'] = lig_a_idx
        # mark the output directory with reversed
        trajectory_directory = f'{trajectory_directory}_reversed'
        # mark new yaml file with reversed
        temp_path = new_yaml.split('.')
        new_yaml = f'{temp_path[0]}_reversed.{temp_path[1]}'
    else:
        options['old_ligand_index'] = lig_a_idx
        options['new_ligand_index'] = lig_b_idx
    options['trajectory_directory'] = f'{trajectory_directory}'
    with open(new_yaml, 'w') as outfile:
        yaml.dump(options, outfile)

    # run the simulation - using API point to respect logging level
    run(new_yaml)

    _logger.info(f'Relative calculation of ligand {lig_a_idx} to {lig_b_idx} complete')

    if tidy:
        os.remove(new_yaml)


# Defining command line arguments
# fetching targets from github repo
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
targets_url = f"{base_repo_url}/raw/master/data/targets.yml"
with fetch_url_contents(targets_url) as response:
    targets_dict = yaml.safe_load(response.read())
# get the possible choices from targets yaml file
target_choices = targets_dict.keys()

arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks.')
arg_parser.add_argument(
    "--target",
    type=str,
    help="Target biomolecule, use openff's plbenchmark names.",
    choices=target_choices,
    required=True
)
arg_parser.add_argument(
    "--edge",
    type=int,
    help="Edge index (0-based) according to edges yaml file in dataset. Ex. --edge 5 (for sixth edge)",
    required=True
)
arg_parser.add_argument(
    "--reversed",
    action='store_true',
    help="Whether to run the edge in reverse direction. Helpful for consistency checks."
)
args = arg_parser.parse_args()
target = args.target
is_reversed = args.reversed

# Fetch protein pdb file
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
target_dir = targets_dict[target]['dir']
pdb_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/protein.pdb"
pdb_file = retrieve_file_url(pdb_url)

# Fetch cofactors crystalwater pdb file
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
cofactors_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/cofactors_crystalwater.pdb"
cofactors_file = retrieve_file_url(cofactors_url)

# Concatenate protein with cofactors pdbs
concatenate_files((pdb_file, cofactors_file), 'target.pdb')

# Fetch ligands sdf files and concatenate them in one
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml"
with fetch_url_contents(ligands_url) as response:
    ligands_dict = yaml.safe_load(response.read())
ligand_files = []
for ligand in ligands_dict.keys():
    ligand_url = f"{base_repo_url}/raw/master/data/{target_dir}/02_ligands/{ligand}/crd/{ligand}.sdf"
    ligand_file = retrieve_file_url(ligand_url)
    ligand_files.append(ligand_file)
# concatenate sdfs
concatenate_files(ligand_files, 'ligands.sdf')

# run simulation
# fetch edges information
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
edges_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/edges.yml"
with fetch_url_contents(edges_url) as response:
    edges_dict = yaml.safe_load(response.read())
edges_list = list(edges_dict.values())  # suscriptable edges object - note dicts are ordered for py>=3.7
# edge list to access by index
edge_index = args.edge  # read from cli arguments
edge = edges_list[edge_index]
ligand_a_name = edge['ligand_a']
ligand_b_name = edge['ligand_b']
# ligands list to get indices -- preserving same order as upstream yaml file
ligands_list = list(ligands_dict.keys())
lig_a_index = ligands_list.index(ligand_a_name)
lig_b_index = ligands_list.index(ligand_b_name)
# Perform the simulation
run_relative_perturbation(lig_a_index, lig_b_index, reverse=is_reversed)


================================================
FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/template.yaml
================================================
# Path to protein file
protein_pdb: null
# Path to ligand SDF file
ligand_file: null
# Indices of old and new ligands within SDF file
old_ligand_index: null
new_ligand_index: null

#
# Force fields
#

# OpenMM ffxml force field files installed via the openmm-forcefields package
# for biopolymers and solvents.
# Note that small molecule force field files should NOT be included here.
forcefield_files:
    - amber/ff14SB.xml # ff14SB protein force field
    - amber/tip3p_standard.xml # TIP3P and recommended monovalent ion parameters
    - amber/tip3p_HFE_multivalent.xml # for divalent ions
    - amber/phosaa10.xml # HANDLES THE TPO

# Small molecule force field
# Options include anything allowed by the openmmforcefields SystemGenerator
# e.g. one of ['openff-2.0.0', 'gaff-2.11']
small_molecule_forcefield: espaloma-0.2.2

#
# Simulation conditions
#

# Simulation setup options
solvent_padding: 9.0 # angstroms

# Use geometry-derived mapping
use_given_geometries: true
given_geometries_tolerance: 0.2 # angstroms

# Atom mapping specification
atom_expression:
    - IntType
bond_expession:
    - DefaultBonds

# Multi-state sampling scheme
# One of ['repex', 'nonequilibrium', 'sams']
fe_type: repex

# Checkpoint interval
checkpoint_interval: 50 # number of iterations

# Number of equilibration iterations
n_equilibration_iterations: 0

# Number of iterations to run
n_cycles: 5000

# Number of alchemical intermediate states to use
n_states: 12

pressure: 1.0 # atmsopheres
temperature: 300.0 # kelvin
timestep: 4.0 # femtoseconds
# remove_constraints: false

# Number of integration stpes per iteration
n_steps_per_move_application: 250

# Location for storing trajectories
trajectory_directory: null

# Prefix for trajectory files (project-specific name)
trajectory_prefix: out

# Atoms to store in NetCDF files (MDTraj selection syntax)
atom_selection: not water

# Calculation phases to run
# Permitted phases: ['complex', 'solvent', 'vacuum']
phases:
    - complex
    - solvent


================================================
FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/LSF-job-template.sh
================================================
#!/bin/bash
#BSUB -P "tyk2-benchmark"
#BSUB -J "perses-benchmark-[1-24]"
#BSUB -n 1
#BSUB -R rusage[mem=8]
#BSUB -R span[hosts=1]
#BSUB -q gpuqueue
#BSUB -sp 1 # low priority. default is 12, max is 25
#BSUB -gpu num=1:j_exclusive=yes:mode=shared
#BSUB -W  24:00
#BSUB -o out_%J_%I.stdout
#BSUB -eo out_%J_%I.stderr
#BSUB -L /bin/bash

source ~/.bashrc
OPENMM_CPU_THREADS=1

echo "changing directory to ${LS_SUBCWD}"
cd $LS_SUBCWD
conda activate espaloma-perses

# Report node in use
hostname

# Report CUDA info
env | sort | grep 'CUDA'

# launching a benchmark pair (target, edge) per job (0-based thus substract 1)
python run_benchmarks.py --target tyk2 --edge $(( $LSB_JOBINDEX - 1 ))


================================================
FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/README.md
================================================
# Perses benchmarks

This subdirectory exposes a CLI tool for running automated benchmarks from
[OpenFF's protein ligand benchmark dataset](https://github.com/openforcefield/protein-ligand-benchmark) using perses.

## Running all edges

A script to run all transformations in an LSF batch scheduler is provided, but will likely need to be modified for your batch queue system:
```bash
bsub < LSF-job-template.sh
```

## Running single edges

Assuming you have a clone of the perses code repository and you are standing in the `benchmarks` subdirectory
(where this file lives). Then the benchmarks can be run using the following command syntax:
```bash
python run_benchmarks.py --target [protein-name] --edge [edge-index]
```

For example, for running the seventh edge (zero-based, according to [plbenchmark data](https://github.com/openforcefield/protein-ligand-benchmark) )
for `tyk2` protein, you would run:
```bash
# Set up and run edge 6
python run_benchmarks.py --target tyk2 --edge 6
```
Should the calculation for an edge fail, you can simply re-run the same command-line and the calculation will resume:
```bash
# Resume failed edge 6
python run_benchmarks.py --target tyk2 --edge 6
```
For more information on how to use the tool, you can run `python run_benchmarks.py -h`.

## Analyzing benchmarks

To analyze the simulations a script called `benchmark_analysis.py` is used as follows:
```bash
python benchmark_analysis.py --target [protein-name]
```

For example, for tyk2 results:
```bash
python benchmark_analysis.py --target tyk2
```
This will generate an output CSV file for [`arsenic`](https://github.com/openforcefield/arsenic) and corresponding absolute and relative free energy plots as PNG files produced according to best practices.)

For more information on how to use the cli analysis tool use `python benchmark_analysis.py -h`.


================================================
FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/benchmark_analysis.py
================================================
"""
Script to perform analysis of perses simulations executed using run_benchmarks.py script.

Intended to be used on systems from https://github.com/openforcefield/protein-ligand-benchmark
"""

import argparse
import glob
import itertools
import re
import warnings

import numpy as np
import urllib.request
import yaml

from openmmtools.constants import kB
from perses.analysis.load_simulations import Simulation

from openmm import unit

from openff.arsenic import plotting, wrangle

# global variables
base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark"


# Helper functions

def get_simdir_list(base_dir='.', is_reversed=False):
    """
    Get list of directories to extract simulation data.

    Attributes
    ----------
    base_dir: str, optional, default='.'
        Base directory where to search for simulations results. Defaults to current directory.
    is_reversed: bool, optional, default=False
        Whether to consider the reversed simulations or not. Meant for testing purposes.

    Returns
    -------
    dir_list: list
        List of directories paths for simulation results.
    """
    # Load all expected simulation from directories
    out_dirs = ['/'.join(filepath.split('/')[:-1]) for filepath in glob.glob(f'{base_dir}/out*/*complex.nc')]
    reg = re.compile(r'out_[0-9]+_[0-9]+_reversed')  # regular expression to deal with reversed directories
    if is_reversed:
        # Choose only reversed directories
        out_dirs = list(filter(reg.search, out_dirs))
    else:
        # Filter out reversed directories
        out_dirs = list(itertools.filterfalse(reg.search, out_dirs))
    return out_dirs


def get_simulations_data(simulation_dirs):
    """Generates a list of simulation data objects given the simulation directories paths."""
    simulations = []
    for out_dir in simulation_dirs:
        # Load complete or fully working simulations
        # TODO: Try getting better exceptions from openmmtools -- use non-generic exceptions
        try:
            simulation = Simulation(out_dir)
            simulations.append(simulation)
        except Exception:
            warnings.warn(f"Edge in {out_dir} could not be loaded. Check simulation output is complete.")
    return simulations


def to_arsenic_csv(experimental_data: dict, simulation_data: list, out_csv: str = 'out_benchmark.csv'):
    """
    Generates a csv file to be used with openff-arsenic. Energy units in kcal/mol.

    .. warning:: To be deprecated once arsenic object model is improved.

    Parameters
    ----------
        experimental_data: dict
            Python nested dictionary with experimental data in micromolar or nanomolar units.
            Example of entry:

                {'lig_ejm_31': {'measurement': {'comment': 'Table 4, entry 31',
                  'doi': '10.1016/j.ejmech.2013.03.070',
                  'error': -1,
                  'type': 'ki',
                  'unit': 'uM',
                  'value': 0.096},
                  'name': 'lig_ejm_31',
                  'smiles': '[H]c1c(c(c(c(c1[H])Cl)C(=O)N([H])c2c(c(nc(c2[H])N([H])C(=O)C([H])([H])[H])[H])[H])Cl)[H]'}

        simulation_data: list or iterable
            Python iterable object with perses Simulation objects as entries.
        out_csv: str
            Path to output csv file to be generated.
    """
    # Ligand information
    ligands_names = list(ligands_dict.keys())
    lig_id_to_name = dict(enumerate(ligands_names))
    kBT = kB * 300 * unit.kelvin  # useful when converting to kcal/mol
    # Write csv file
    with open(out_csv, 'w') as csv_file:
        # Experimental block
        # print header for block
        csv_file.write("# Experimental block\n")
        csv_file.write("# Ligand, expt_DG, expt_dDG\n")
        # Extract ligand name, expt_DG and expt_dDG from ligands dictionary
        for ligand_name, ligand_data in experimental_data.items():
            # TODO: Handle multiple measurement types
            unit_symbol = ligand_data['measurement']['unit']
            measurement_value = ligand_data['measurement']['value']
            measurement_error = ligand_data['measurement']['error']
            # Unit conversion
            # TODO: Let's persuade PLBenchmarks to use pint units
            unit_conversions = { 'M' : 1.0, 'mM' : 1e-3, 'uM' : 1e-6, 'nM' : 1e-9, 'pM' : 1e-12, 'fM' : 1e-15 }
            if unit_symbol not in unit_conversions:
                raise ValueError(f'Unknown units "{unit_symbol}"')
            value_to_molar= unit_conversions[unit_symbol]
            # Handle unknown errors
            # TODO: We should be able to ensure that all entries have more reasonable errors.
            if measurement_error == -1:
                # TODO: For now, we use a relative_error from the Tyk2 system 10.1016/j.ejmech.2013.03.070
                relative_error = 0.3
            else:
                relative_error = measurement_error / measurement_value
            # Convert to free eneriges
            expt_DG = kBT.value_in_unit(unit.kilocalorie_per_mole) * np.log(measurement_value * value_to_molar)
            expt_dDG = kBT.value_in_unit(unit.kilocalorie_per_mole) * relative_error
            csv_file.write(f"{ligand_name}, {expt_DG}, {expt_dDG}\n")

        # Calculated block
        # print header for block
        csv_file.write("# Calculated block\n")
        csv_file.write("# Ligand1,Ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional)\n")
        # Loop through simulation, extract ligand1 and ligand2 indices, convert to names, create string with
        # ligand1, ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional)
        # write string in csv file
        for simulation in simulation_data:
            out_dir = simulation.directory.split('/')[-1]
            # getting integer indices
            ligand1_id, ligand2_id = int(out_dir.split('_')[-1]), int(out_dir.split('_')[-2])  # CHECK ORDER!
            # getting names of ligands
            ligand1, ligand2 = lig_id_to_name[ligand1_id], lig_id_to_name[ligand2_id]
            # getting calc_DDG in kcal/mol
            calc_DDG = simulation.bindingdg.value_in_unit(unit.kilocalorie_per_mole)
            # getting calc_dDDG in kcal/mol
            calc_dDDG = simulation.bindingddg.value_in_unit(unit.kilocalorie_per_mole)
            csv_file.write(
                f"{ligand1}, {ligand2}, {calc_DDG}, {calc_dDDG}, 0.0\n")  # hardcoding additional error as 0.0


# Defining command line arguments
# fetching targets from github repo
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
targets_url = f"{base_repo_url}/raw/master/data/targets.yml"
with urllib.request.urlopen(targets_url) as response:
    targets_dict = yaml.safe_load(response.read())
# get the possible choices from targets yaml file
target_choices = targets_dict.keys()

arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks analysis.')
arg_parser.add_argument(
    "--target",
    type=str,
    help="Target biomolecule, use openff's plbenchmark names.",
    choices=target_choices,
    required=True
)
arg_parser.add_argument(
    "--reversed",
    action='store_true',
    help="Analyze reversed edge simulations. Helpful for testing/consistency checks."
)
args = arg_parser.parse_args()
target = args.target

# Download experimental data
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
# TODO: Let's cache this data when we set up the initial simulations in case it changes in between setting up and running the calculations and analysis.
# TODO: Let's also be sure to use a specific release tag rather than 'master'
target_dir = targets_dict[target]['dir']
ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml"
with urllib.request.urlopen(ligands_url) as response:
    yaml_contents = response.read()
    print(yaml_contents)
    ligands_dict = yaml.safe_load(yaml_contents)

# DEBUG
print('')
print(yaml.dump(ligands_dict))

# Get paths for simulation output directories
out_dirs = get_simdir_list(is_reversed=args.reversed)

# Generate list with simulation objects
simulations = get_simulations_data(out_dirs)

# Generate csv file
csv_path = f'./{target}_arsenic.csv'
to_arsenic_csv(ligands_dict, simulations, out_csv=csv_path)


# TODO: Separate plotting in a different file
# Make plots and store
fe = wrangle.FEMap(csv_path)
# Relative plot
plotting.plot_DDGs(fe.graph,
                   target_name=f'{target}',
                   title=f'Relative binding energies - {target}',
                   figsize=5,
                   filename='./plot_relative.pdf'
                   )
# Absolute plot, with experimental data shifted to correct mean
experimental_mean_dg = np.asarray([node[1]["exp_DG"] for node in fe.graph.nodes(data=True)]).mean()
plotting.plot_DGs(fe.graph,
                  target_name=f'{target}',
                  title=f'Absolute binding energies - {target}',
                  figsize=5,
                  filename='./plot_absolute.pdf',
                  shift=experimental_mean_dg,
                  )


================================================
FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/run_benchmarks.py
================================================
#!/usr/bin/env python

"""
CLI utility to automatically run benchmarks using data from the open force field protein-ligand benchmark at
https://github.com/openforcefield/protein-ligand-benchmark

It requires internet connection to function properly, by connecting to the mentioned repository.
"""
# TODO: Use plbenchmarks when conda package is available.

import argparse
import logging
import os
import yaml

from perses.app.setup_relative_calculation import run
from perses.utils.url_utils import retrieve_file_url
from perses.utils.url_utils import fetch_url_contents

# Setting logging level config
LOGLEVEL = os.environ.get("LOGLEVEL", "DEBUG").upper()
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=LOGLEVEL,
    datefmt='%Y-%m-%d %H:%M:%S')
_logger = logging.getLogger()
_logger.setLevel(LOGLEVEL)

# global variables
base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark"


def concatenate_files(input_files, output_file):
    """
    Concatenate files given in input_files iterator into output_file.
    """
    with open(output_file, 'w') as outfile:
        for filename in input_files:
            with open(filename) as infile:
                for line in infile:
                    outfile.write(line)


def run_relative_perturbation(lig_a_idx, lig_b_idx, reverse=False, tidy=True):
    """
    Perform relative free energy simulation using perses CLI.

    Parameters
    ----------
        lig_a_idx : int
            Index for first ligand (ligand A)
        lig_b_idx : int
            Index for second ligand (ligand B)
        reverse: bool
            Run the edge in reverse direction. Swaps the ligands.
        tidy : bool, optional
            remove auto-generated yaml files.

    Expects the target/protein pdb file in the same directory to be called 'target.pdb', and ligands file
    to be called 'ligands.sdf'.
    """
    _logger.info(f'Starting relative calculation of ligand {lig_a_idx} to {lig_b_idx}')
    trajectory_directory = f'out_{lig_a_idx}_{lig_b_idx}'
    new_yaml = f'relative_{lig_a_idx}_{lig_b_idx}.yaml'

    # read base template yaml file
    # TODO: template.yaml file is configured for Tyk2, check if the same options work for others.
    with open(f'template.yaml', "r") as yaml_file:
        options = yaml.load(yaml_file, Loader=yaml.FullLoader)

    # TODO: add a step to perform some minimization - should help with NaNs
    # generate yaml file from template
    options['protein_pdb'] = 'target.pdb'
    options['ligand_file'] = 'ligands.sdf'
    if reverse:
        # Do the other direction of ligands
        options['old_ligand_index'] = lig_b_idx
        options['new_ligand_index'] = lig_a_idx
        # mark the output directory with reversed
        trajectory_directory = f'{trajectory_directory}_reversed'
        # mark new yaml file with reversed
        temp_path = new_yaml.split('.')
        new_yaml = f'{temp_path[0]}_reversed.{temp_path[1]}'
    else:
        options['old_ligand_index'] = lig_a_idx
        options['new_ligand_index'] = lig_b_idx
    options['trajectory_directory'] = f'{trajectory_directory}'
    with open(new_yaml, 'w') as outfile:
        yaml.dump(options, outfile)

    # run the simulation - using API point to respect logging level
    run(new_yaml)

    _logger.info(f'Relative calculation of ligand {lig_a_idx} to {lig_b_idx} complete')

    if tidy:
        os.remove(new_yaml)


# Defining command line arguments
# fetching targets from github repo
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
targets_url = f"{base_repo_url}/raw/master/data/targets.yml"
with fetch_url_contents(targets_url) as response:
    targets_dict = yaml.safe_load(response.read())
# get the possible choices from targets yaml file
target_choices = targets_dict.keys()

arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks.')
arg_parser.add_argument(
    "--target",
    type=str,
    help="Target biomolecule, use openff's plbenchmark names.",
    choices=target_choices,
    required=True
)
arg_parser.add_argument(
    "--edge",
    type=int,
    help="Edge index (0-based) according to edges yaml file in dataset. Ex. --edge 5 (for sixth edge)",
    required=True
)
arg_parser.add_argument(
    "--reversed",
    action='store_true',
    help="Whether to run the edge in reverse direction. Helpful for consistency checks."
)
args = arg_parser.parse_args()
target = args.target
is_reversed = args.reversed

# Fetch protein pdb file
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
target_dir = targets_dict[target]['dir']
pdb_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/protein.pdb"
pdb_file = retrieve_file_url(pdb_url)

# Fetch cofactors crystalwater pdb file
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
cofactors_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/cofactors_crystalwater.pdb"
cofactors_file = retrieve_file_url(cofactors_url)

# Concatenate protein with cofactors pdbs
concatenate_files((pdb_file, cofactors_file), 'target.pdb')

# Fetch ligands sdf files and concatenate them in one
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml"
with fetch_url_contents(ligands_url) as response:
    ligands_dict = yaml.safe_load(response.read())
ligand_files = []
for ligand in ligands_dict.keys():
    ligand_url = f"{base_repo_url}/raw/master/data/{target_dir}/02_ligands/{ligand}/crd/{ligand}.sdf"
    ligand_file = retrieve_file_url(ligand_url)
    ligand_files.append(ligand_file)
# concatenate sdfs
concatenate_files(ligand_files, 'ligands.sdf')

# run simulation
# fetch edges information
# TODO: This part should be done using plbenchmarks API - once there is a conda pkg
edges_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/edges.yml"
with fetch_url_contents(edges_url) as response:
    edges_dict = yaml.safe_load(response.read())
edges_list = list(edges_dict.values())  # suscriptable edges object - note dicts are ordered for py>=3.7
# edge list to access by index
edge_index = args.edge  # read from cli arguments
edge = edges_list[edge_index]
ligand_a_name = edge['ligand_a']
ligand_b_name = edge['ligand_b']
# ligands list to get indices -- preserving same order as upstream yaml file
ligands_list = list(ligands_dict.keys())
lig_a_index = ligands_list.index(ligand_a_name)
lig_b_index = ligands_list.index(ligand_b_name)
# Perform the simulation
run_relative_perturbation(lig_a_index, lig_b_index, reverse=is_reversed)


================================================
FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/template.yaml
================================================
# Path to protein file
protein_pdb: null
# Path to ligand SDF file
ligand_file: null
# Indices of old and new ligands within SDF file
old_ligand_index: null
new_ligand_index: null

#
# Force fields
#

# OpenMM ffxml force field files installed via the openmm-forcefields package
# for biopolymers and solvents.
# Note that small molecule force field files should NOT be included here.
forcefield_files:
    - amber/ff14SB.xml # ff14SB protein force field
    - amber/tip3p_standard.xml # TIP3P and recommended monovalent ion parameters
    - amber/tip3p_HFE_multivalent.xml # for divalent ions
    - amber/phosaa10.xml # HANDLES THE TPO

# Small molecule force field
# Options include anything allowed by the openmmforcefields SystemGenerator
# e.g. one of ['openff-2.0.0', 'gaff-2.11']
small_molecule_forcefield: openff-1.2.0

#
# Simulation conditions
#

# Simulation setup options
solvent_padding: 9.0 # angstroms

# Use geometry-derived mapping
use_given_geometries: true
given_geometries_tolerance: 0.2 # angstroms

# Atom mapping specification
atom_expression:
    - IntType
bond_expession:
    - DefaultBonds

# Multi-state sampling scheme
# One of ['repex', 'nonequilibrium', 'sams']
fe_type: repex

# Checkpoint interval
checkpoint_interval: 50 # number of iterations

# Number of equilibration iterations
n_equilibration_iterations: 0

# Number of iterations to run
n_cycles: 5000

# Number of alchemical intermediate states to use
n_states: 12

pressure: 1.0 # atmsopheres
temperature: 300.0 # kelvin
timestep: 4.0 # femtoseconds
# remove_constraints: false

# Number of integration stpes per iteration
n_steps_per_move_application: 250

# Location for storing trajectories
trajectory_directory: null

# Prefix for trajectory files (project-specific name)
trajectory_prefix: out

# Atoms to store in NetCDF files (MDTraj selection syntax)
atom_selection: not water

# Calculation phases to run
# Permitted phases: ['complex', 'solvent', 'vacuum']
phases:
    - complex
    - solvent


================================================
FILE: setup.cfg
================================================
# Helper file to handle all configs

[coverage:run]
# .coveragerc to control coverage.py and pytest-cov
omit =
    # Omit the tests
    */tests/*
    # Omit generated versioneer
    espaloma/_version.py

[yapf]
# YAPF, in .style.yapf files this shows up as "[style]" header
COLUMN_LIMIT = 119
INDENT_WIDTH = 4
USE_TABS = False

[flake8]
# Flake8, PyFlakes, etc
max-line-length = 119

[versioneer]
# Automatic version numbering scheme
VCS = git
style = pep440
versionfile_source = espaloma/_version.py
versionfile_build = espaloma/_version.py
tag_prefix = ''

[aliases]
test = pytest


================================================
FILE: setup.py
================================================
"""
espaloma
Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm
"""
import sys

from setuptools import find_packages, setup

import versioneer

short_description = __doc__.split("\n")

# from https://github.com/pytest-dev/pytest-runner#conditional-requirement
needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
pytest_runner = ['pytest-runner'] if needs_pytest else []

try:
    with open("README.md", "r") as handle:
        long_description = handle.read()
except:
    long_description = "\n".join(short_description[2:])


setup(
    # Self-descriptive entries which should always be present
    name='espaloma',
    author='Yuanqing Wang @ choderalab // MSKCC',
    author_email='wangyq@wangyq.net',
    description=short_description[0],
    long_description=long_description,
    long_description_content_type="text/markdown",
    version=versioneer.get_version(),
    cmdclass=versioneer.get_cmdclass(),
    license='MIT',

    # Which Python importable modules should be included when your package is installed
    # Handled automatically by setuptools. Use 'exclude' to prevent some specific
    # subpackage(s) from being added, if needed
    packages=find_packages(),

    # Optional include package data to ship with your package
    # Customize MANIFEST.in if the general case does not suit your needs
    # Comment out this line to prevent the files from being packaged with your software
    include_package_data=True,

    # Allows `setup.py test` to work correctly with pytest
    setup_requires=[] + pytest_runner,

    # Additional entries you may want simply uncomment the lines you want and fill in the data
    # url='http://www.my_package.com',  # Website
    # install_requires=[],              # Required packages, pulls from pip if needed; do not use for Conda deployment
    # platforms=['Linux',
    #            'Mac OS-X',
    #            'Unix',
    #            'Windows'],            # Valid platforms your code works on, adjust to your flavor
    # python_requires=">=3.5",          # Python version restrictions

    # Manual control if final package is compressible or not, set False to prevent the .egg from being made
    # zip_safe=False,

)


================================================
FILE: versioneer.py
================================================

# Version: 0.29

"""The Versioneer - like a rocketeer, but for versions.

The Versioneer
==============

* like a rocketeer, but for versions!
* https://github.com/python-versioneer/python-versioneer
* Brian Warner
* License: Public Domain (Unlicense)
* Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3
* [![Latest Version][pypi-image]][pypi-url]
* [![Build Status][travis-image]][travis-url]

This is a tool for managing a recorded version number in setuptools-based
python projects. The goal is to remove the tedious and error-prone "update
the embedded version string" step from your release process. Making a new
release should be as easy as recording a new tag in your version-control
system, and maybe making new tarballs.


## Quick Install

Versioneer provides two installation modes. The "classic" vendored mode installs
a copy of versioneer into your repository. The experimental build-time dependency mode
is intended to allow you to skip this step and simplify the process of upgrading.

### Vendored mode

* `pip install versioneer` to somewhere in your $PATH
   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
     available, so you can also use `conda install -c conda-forge versioneer`
* add a `[tool.versioneer]` section to your `pyproject.toml` or a
  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
   * Note that you will need to add `tomli; python_version < "3.11"` to your
     build-time dependencies if you use `pyproject.toml`
* run `versioneer install --vendor` in your source tree, commit the results
* verify version information with `python setup.py version`

### Build-time dependency mode

* `pip install versioneer` to somewhere in your $PATH
   * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is
     available, so you can also use `conda install -c conda-forge versioneer`
* add a `[tool.versioneer]` section to your `pyproject.toml` or a
  `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md))
* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`)
  to the `requires` key of the `build-system` table in `pyproject.toml`:
  ```toml
  [build-system]
  requires = ["setuptools", "versioneer[toml]"]
  build-backend = "setuptools.build_meta"
  ```
* run `versioneer install --no-vendor` in your source tree, commit the results
* verify version information with `python setup.py version`

## Version Identifiers

Source trees come from a variety of places:

* a version-control system checkout (mostly used by developers)
* a nightly tarball, produced by build automation
* a snapshot tarball, produced by a web-based VCS browser, like github's
  "tarball from tag" feature
* a release tarball, produced by "setup.py sdist", distributed through PyPI

Within each source tree, the version identifier (either a string or a number,
this tool is format-agnostic) can come from a variety of places:

* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
  about recent "tags" and an absolute revision-id
* the name of the directory into which the tarball was unpacked
* an expanded VCS keyword ($Id$, etc)
* a `_version.py` created by some earlier build step

For released software, the version identifier is closely related to a VCS
tag. Some projects use tag names that include more than just the version
string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
needs to strip the tag prefix to extract the version identifier. For
unreleased software (between tags), the version identifier should provide
enough information to help developers recreate the same tree, while also
giving them an idea of roughly how old the tree is (after version 1.2, before
version 1.3). Many VCS systems can report a description that captures this,
for example `git describe --tags --dirty --always` reports things like
"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
uncommitted changes).

The version identifier is used for multiple purposes:

* to allow the module to self-identify its version: `myproject.__version__`
* to choose a name and prefix for a 'setup.py sdist' tarball

## Theory of Operation

Versioneer works by adding a special `_version.py` file into your source
tree, where your `__init__.py` can import it. This `_version.py` knows how to
dynamically ask the VCS tool for version information at import time.

`_version.py` also contains `$Revision$` markers, and the installation
process marks `_version.py` to have this marker rewritten with a tag name
during the `git archive` command. As a result, generated tarballs will
contain enough information to get the proper version.

To allow `setup.py` to compute a version too, a `versioneer.py` is added to
the top level of your source tree, next to `setup.py` and the `setup.cfg`
that configures it. This overrides several distutils/setuptools commands to
compute the version when invoked, and changes `setup.py build` and `setup.py
sdist` to replace `_version.py` with a small static file that contains just
the generated version data.

## Installation

See [INSTALL.md](./INSTALL.md) for detailed installation instructions.

## Version-String Flavors

Code which uses Versioneer can learn about its version string at runtime by
importing `_version` from your main `__init__.py` file and running the
`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
import the top-level `versioneer.py` and run `get_versions()`.

Both functions return a dictionary with different flavors of version
information:

* `['version']`: A condensed version string, rendered using the selected
  style. This is the most commonly used value for the project's version
  string. The default "pep440" style yields strings like `0.11`,
  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
  below for alternative styles.

* `['full-revisionid']`: detailed revision identifier. For Git, this is the
  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".

* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
  commit date in ISO 8601 format. This will be None if the date is not
  available.

* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
  this is only accurate if run in a VCS checkout, otherwise it is likely to
  be False or None

* `['error']`: if the version string could not be computed, this will be set
  to a string describing the problem, otherwise it will be None. It may be
  useful to throw an exception in setup.py if this is set, to avoid e.g.
  creating tarballs with a version string of "unknown".

Some variants are more useful than others. Including `full-revisionid` in a
bug report should allow developers to reconstruct the exact code being tested
(or indicate the presence of local changes that should be shared with the
developers). `version` is suitable for display in an "about" box or a CLI
`--version` output: it can be easily compared against release notes and lists
of bugs fixed in various releases.

The installer adds the following text to your `__init__.py` to place a basic
version in `YOURPROJECT.__version__`:

    from ._version import get_versions
    __version__ = get_versions()['version']
    del get_versions

## Styles

The setup.cfg `style=` configuration controls how the VCS information is
rendered into a version string.

The default style, "pep440", produces a PEP440-compliant string, equal to the
un-prefixed tag name for actual releases, and containing an additional "local
version" section with more detail for in-between builds. For Git, this is
TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
that this commit is two revisions ("+2") beyond the "0.11" tag. For released
software (exactly equal to a known tag), the identifier will only contain the
stripped tag, e.g. "0.11".

Other styles are available. See [details.md](details.md) in the Versioneer
source tree for descriptions.

## Debugging

Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
to return a version of "0+unknown". To investigate the problem, run `setup.py
version`, which will run the version-lookup code in a verbose mode, and will
display the full contents of `get_versions()` (including the `error` string,
which may help identify what went wrong).

## Known Limitations

Some situations are known to cause problems for Versioneer. This details the
most significant ones. More can be found on Github
[issues page](https://github.com/python-versioneer/python-versioneer/issues).

### Subprojects

Versioneer has limited support for source trees in which `setup.py` is not in
the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
two common reasons why `setup.py` might not be in the root:

* Source trees which contain multiple subprojects, such as
  [Buildbot](https://github.com/buildbot/buildbot), which contains both
  "master" and "slave" subprojects, each with their own `setup.py`,
  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
  distributions (and upload multiple independently-installable tarballs).
* Source trees whose main purpose is to contain a C library, but which also
  provide bindings to Python (and perhaps other languages) in subdirectories.

Versioneer will look for `.git` in parent directories, and most operations
should get the right version string. However `pip` and `setuptools` have bugs
and implementation details which frequently cause `pip install .` from a
subproject directory to fail to find a correct version string (so it usually
defaults to `0+unknown`).

`pip install --editable .` should work correctly. `setup.py install` might
work too.

Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
some later version.

[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
this issue. The discussion in
[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
issue from the Versioneer side in more detail.
[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
pip to let Versioneer work correctly.

Versioneer-0.16 and earlier only looked for a `.git` directory next to the
`setup.cfg`, so subprojects were completely unsupported with those releases.

### Editable installs with setuptools <= 18.5

`setup.py develop` and `pip install --editable .` allow you to install a
project into a virtualenv once, then continue editing the source code (and
test) without re-installing after every change.

"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
convenient way to specify executable scripts that should be installed along
with the python package.

These both work as expected when using modern setuptools. When using
setuptools-18.5 or earlier, however, certain operations will cause
`pkg_resources.DistributionNotFound` errors when running the entrypoint
script, which must be resolved by re-installing the package. This happens
when the install happens with one version, then the egg_info data is
regenerated while a different version is checked out. Many setup.py commands
cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
a different virtualenv), so this can be surprising.

[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
this one, but upgrading to a newer version of setuptools should probably
resolve it.


## Updating Versioneer

To upgrade your project to a new release of Versioneer, do the following:

* install the new Versioneer (`pip install -U versioneer` or equivalent)
* edit `setup.cfg` and `pyproject.toml`, if necessary,
  to include any new configuration settings indicated by the release notes.
  See [UPGRADING](./UPGRADING.md) for details.
* re-run `versioneer install --[no-]vendor` in your source tree, to replace
  `SRC/_version.py`
* commit any changed files

## Future Directions

This tool is designed to make it easily extended to other version-control
systems: all VCS-specific components are in separate directories like
src/git/ . The top-level `versioneer.py` script is assembled from these
components by running make-versioneer.py . In the future, make-versioneer.py
will take a VCS name as an argument, and will construct a version of
`versioneer.py` that is specific to the given VCS. It might also take the
configuration arguments that are currently provided manually during
installation by editing setup.py . Alternatively, it might go the other
direction and include code from all supported VCS systems, reducing the
number of intermediate scripts.

## Similar projects

* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
  dependency
* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
  versioneer
* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
  plugin

## License

To make Versioneer easier to embed, all its code is dedicated to the public
domain. The `_version.py` that it creates is also in the public domain.
Specifically, both are released under the "Unlicense", as described in
https://unlicense.org/.

[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
[pypi-url]: https://pypi.python.org/pypi/versioneer/
[travis-image]:
https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer

"""
# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
# pylint:disable=attribute-defined-outside-init,too-many-arguments

import configparser
import errno
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union
from typing import NoReturn
import functools

have_tomllib = True
if sys.version_info >= (3, 11):
    import tomllib
else:
    try:
        import tomli as tomllib
    except ImportError:
        have_tomllib = False


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

    VCS: str
    style: str
    tag_prefix: str
    versionfile_source: str
    versionfile_build: Optional[str]
    parentdir_prefix: Optional[str]
    verbose: Optional[bool]


def get_root() -> str:
    """Get the project root directory.

    We require that all commands are run from the project root, i.e. the
    directory that contains setup.py, setup.cfg, and versioneer.py .
    """
    root = os.path.realpath(os.path.abspath(os.getcwd()))
    setup_py = os.path.join(root, "setup.py")
    pyproject_toml = os.path.join(root, "pyproject.toml")
    versioneer_py = os.path.join(root, "versioneer.py")
    if not (
        os.path.exists(setup_py)
        or os.path.exists(pyproject_toml)
        or os.path.exists(versioneer_py)
    ):
        # allow 'python path/to/setup.py COMMAND'
        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
        setup_py = os.path.join(root, "setup.py")
        pyproject_toml = os.path.join(root, "pyproject.toml")
        versioneer_py = os.path.join(root, "versioneer.py")
    if not (
        os.path.exists(setup_py)
        or os.path.exists(pyproject_toml)
        or os.path.exists(versioneer_py)
    ):
        err = ("Versioneer was unable to run the project root directory. "
               "Versioneer requires setup.py to be executed from "
               "its immediate directory (like 'python setup.py COMMAND'), "
               "or in a way that lets it use sys.argv[0] to find the root "
               "(like 'python path/to/setup.py COMMAND').")
        raise VersioneerBadRootError(err)
    try:
        # Certain runtime workflows (setup.py install/develop in a setuptools
        # tree) execute all dependencies in a single python process, so
        # "versioneer" may be imported multiple times, and python's shared
        # module-import table will cache the first one. So we can't use
        # os.path.dirname(__file__), as that will find whichever
        # versioneer.py was first imported, even in later projects.
        my_path = os.path.realpath(os.path.abspath(__file__))
        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
        if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals():
            print("Warning: build in %s is using versioneer.py from %s"
                  % (os.path.dirname(my_path), versioneer_py))
    except NameError:
        pass
    return root


def get_config_from_root(root: str) -> VersioneerConfig:
    """Read the project setup.cfg file to determine Versioneer config."""
    # This might raise OSError (if setup.cfg is missing), or
    # configparser.NoSectionError (if it lacks a [versioneer] section), or
    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
    # the top of versioneer.py for instructions on writing your setup.cfg .
    root_pth = Path(root)
    pyproject_toml = root_pth / "pyproject.toml"
    setup_cfg = root_pth / "setup.cfg"
    section: Union[Dict[str, Any], configparser.SectionProxy, None] = None
    if pyproject_toml.exists() and have_tomllib:
        try:
            with open(pyproject_toml, 'rb') as fobj:
                pp = tomllib.load(fobj)
            section = pp['tool']['versioneer']
        except (tomllib.TOMLDecodeError, KeyError) as e:
            print(f"Failed to load config from {pyproject_toml}: {e}")
            print("Try to load it from setup.cfg")
    if not section:
        parser = configparser.ConfigParser()
        with open(setup_cfg) as cfg_file:
            parser.read_file(cfg_file)
        parser.get("versioneer", "VCS")  # raise error if missing

        section = parser["versioneer"]

    # `cast`` really shouldn't be used, but its simplest for the
    # common VersioneerConfig users at the moment. We verify against
    # `None` values elsewhere where it matters

    cfg = VersioneerConfig()
    cfg.VCS = section['VCS']
    cfg.style = section.get("style", "")
    cfg.versionfile_source = cast(str, section.get("versionfile_source"))
    cfg.versionfile_build = section.get("versionfile_build")
    cfg.tag_prefix = cast(str, section.get("tag_prefix"))
    if cfg.tag_prefix in ("''", '""', None):
        cfg.tag_prefix = ""
    cfg.parentdir_prefix = section.get("parentdir_prefix")
    if isinstance(section, configparser.SectionProxy):
        # Make sure configparser translates to bool
        cfg.verbose = section.getboolean("verbose")
    else:
        cfg.verbose = section.get("verbose")

    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


# these dictionaries contain VCS-specific tools
LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}


def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""
    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        HANDLERS.setdefault(vcs, {})[method] = f
        return f
    return decorate


def run_command(
    commands: List[str],
    args: List[str],
    cwd: Optional[str] = None,
    verbose: bool = False,
    hide_stderr: bool = False,
    env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
    process = None

    popen_kwargs: Dict[str, Any] = {}
    if sys.platform == "win32":
        # This hides the console window if pythonw.exe is used
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        popen_kwargs["startupinfo"] = startupinfo

    for command in commands:
        try:
            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                       stdout=subprocess.PIPE,
                                       stderr=(subprocess.PIPE if hide_stderr
                                               else None), **popen_kwargs)
            break
        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %s" % dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %s" % (commands,))
        return None, None
    stdout = process.communicate()[0].strip().decode()
    if process.returncode != 0:
        if verbose:
            print("unable to run %s (error)" % dispcmd)
            print("stdout was %s" % stdout)
        return None, process.returncode
    return stdout, process.returncode


LONG_VERSION_PY['git'] = r'''
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain.
# Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer

"""Git implementation of _version.py."""

import errno
import os
import re
import subprocess
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple
import functools


def get_keywords() -> Dict[str, str]:
    """Get the keywords needed to look up the version information."""
    # these strings will be replaced by git during git-archive.
    # setup.py/versioneer.py will grep for the variable names, so they must
    # each be defined on a line of their own. _version.py will just call
    # get_keywords().
    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
    return keywords


class VersioneerConfig:
    """Container for Versioneer configuration parameters."""

    VCS: str
    style: str
    tag_prefix: str
    parentdir_prefix: str
    versionfile_source: str
    verbose: bool


def get_config() -> VersioneerConfig:
    """Create, populate and return the VersioneerConfig() object."""
    # these strings are filled in when 'setup.py versioneer' creates
    # _version.py
    cfg = VersioneerConfig()
    cfg.VCS = "git"
    cfg.style = "%(STYLE)s"
    cfg.tag_prefix = "%(TAG_PREFIX)s"
    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
    cfg.verbose = False
    return cfg


class NotThisMethod(Exception):
    """Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}


def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
    """Create decorator to mark a method as the handler of a VCS."""
    def decorate(f: Callable) -> Callable:
        """Store f in HANDLERS[vcs][method]."""
        if vcs not in HANDLERS:
            HANDLERS[vcs] = {}
        HANDLERS[vcs][method] = f
        return f
    return decorate


def run_command(
    commands: List[str],
    args: List[str],
    cwd: Optional[str] = None,
    verbose: bool = False,
    hide_stderr: bool = False,
    env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
    """Call the given command(s)."""
    assert isinstance(commands, list)
    process = None

    popen_kwargs: Dict[str, Any] = {}
    if sys.platform == "win32":
        # This hides the console window if pythonw.exe is used
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
        popen_kwargs["startupinfo"] = startupinfo

    for command in commands:
        try:
            dispcmd = str([command] + args)
            # remember shell=False, so use git.cmd on windows, not just git
            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                       stdout=subprocess.PIPE,
                                       stderr=(subprocess.PIPE if hide_stderr
                                               else None), **popen_kwargs)
            break
        except OSError as e:
            if e.errno == errno.ENOENT:
                continue
            if verbose:
                print("unable to run %%s" %% dispcmd)
                print(e)
            return None, None
    else:
        if verbose:
            print("unable to find command, tried %%s" %% (commands,))
        return None, None
    stdout = process.communicate()[0].strip().decode()
    if process.returncode != 0:
        if verbose:
            print("unable to run %%s (error)" %% dispcmd)
            print("stdout was %%s" %% stdout)
        return None, process.returncode
    return stdout, process.returncode


def versions_from_parentdir(
    parentdir_prefix: str,
    root: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {"version": dirname[len(parentdir_prefix):],
                    "full-revisionid": None,
                    "dirty": False, "error": None, "date": None}
        rootdirs.append(root)
        root = os.path.dirname(root)  # up a level

    if verbose:
        print("Tried directories %%s but none started with prefix %%s" %%
              (str(rootdirs), parentdir_prefix))
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords: Dict[str, str] = {}
    try:
        with open(versionfile_abs, "r") as fobj:
            for line in fobj:
                if line.strip().startswith("git_refnames ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["refnames"] = mo.group(1)
                if line.strip().startswith("git_full ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["full"] = mo.group(1)
                if line.strip().startswith("git_date ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["date"] = mo.group(1)
    except OSError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
    keywords: Dict[str, str],
    tag_prefix: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Get version information from git keywords."""
    if "refnames" not in keywords:
        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %%d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = {r for r in refs if re.search(r'\d', r)}
        if verbose:
            print("discarding '%%s', no digits" %% ",".join(refs - tags))
    if verbose:
        print("likely tags: %%s" %% ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
            # Filter out refs that exactly match prefix or that don't start
            # with a number once the prefix is stripped (mostly a concern
            # when prefix is '')
            if not re.match(r'\d', r):
                continue
            if verbose:
                print("picking %%s" %% r)
            return {"version": r,
                    "full-revisionid": keywords["full"].strip(),
                    "dirty": False, "error": None,
                    "date": date}
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {"version": "0+unknown",
            "full-revisionid": keywords["full"].strip(),
            "dirty": False, "error": "no suitable tags", "date": None}


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
    tag_prefix: str,
    root: str,
    verbose: bool,
    runner: Callable = run_command
) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    # GIT_DIR can interfere with correct operation of Versioneer.
    # It may be intended to be passed to the Versioneer-versioned project,
    # but that should not change where we get our version from.
    env = os.environ.copy()
    env.pop("GIT_DIR", None)
    runner = functools.partial(runner, env=env)

    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
                   hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %%s not under git control" %% root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = runner(GITS, [
        "describe", "--tags", "--dirty", "--always", "--long",
        "--match", f"{tag_prefix}[[:digit:]]*"
    ], cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
                             cwd=root)
    # --abbrev-ref was added in git-1.6.3
    if rc != 0 or branch_name is None:
        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
    branch_name = branch_name.strip()

    if branch_name == "HEAD":
        # If we aren't exactly on a branch, pick a branch which represents
        # the current commit. If all else fails, we are on a branchless
        # commit.
        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
        # --contains was added in git-1.5.4
        if rc != 0 or branches is None:
            raise NotThisMethod("'git branch --contains' returned error")
        branches = branches.split("\n")

        # Remove the first line if we're running detached
        if "(" in branches[0]:
            branches.pop(0)

        # Strip off the leading "* " from the list of branches.
        branches = [branch[2:] for branch in branches]
        if "master" in branches:
            branch_name = "master"
        elif not branches:
            branch_name = None
        else:
            # Pick the first branch that is returned. Good or bad.
            branch_name = branches[0]

    pieces["branch"] = branch_name

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[:git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%%s'"
                               %% describe_out)
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%%s' doesn't start with prefix '%%s'"
                print(fmt %% (full_tag, tag_prefix))
            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
                               %% (full_tag, tag_prefix))
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix):]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_branch(pieces: Dict[str, Any]) -> str:
    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .

    The ".dev0" means not master branch. Note that .dev0 sorts backwards
    (a feature branch will appear "older" than the master branch).

    Exceptions:
    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0"
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
    """Split pep440 version string at the post-release segment.

    Returns the release segments before the post-release and the
    post-release version number (or -1 if no post-release segment is present).
    """
    vc = str.split(ver, ".post")
    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None


def render_pep440_pre(pieces: Dict[str, Any]) -> str:
    """TAG[.postN.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        if pieces["distance"]:
            # update the post release segment
            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
            rendered = tag_version
            if post_version is not None:
                rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"])
            else:
                rendered += ".post0.dev%%d" %% (pieces["distance"])
        else:
            # no commits, use the tag as the version
            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%%d" %% pieces["distance"]
    return rendered


def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%%s" %% pieces["short"]
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%%s" %% pieces["short"]
    return rendered


def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .

    The ".dev0" means not master branch.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%%s" %% pieces["short"]
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+g%%s" %% pieces["short"]
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%%d" %% pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%%d" %% pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {"version": "unknown",
                "full-revisionid": pieces.get("long"),
                "dirty": None,
                "error": pieces["error"],
                "date": None}

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-branch":
        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-post-branch":
        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%%s'" %% style)

    return {"version": rendered, "full-revisionid": pieces["long"],
            "dirty": pieces["dirty"], "error": None,
            "date": pieces.get("date")}


def get_versions() -> Dict[str, Any]:
    """Get version information or return default if unable to do so."""
    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
    # __file__, we can work backwards from there to the root. Some
    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
    # case we can only use expanded keywords.

    cfg = get_config()
    verbose = cfg.verbose

    try:
        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
                                          verbose)
    except NotThisMethod:
        pass

    try:
        root = os.path.realpath(__file__)
        # versionfile_source is the relative path from the top of the source
        # tree (where the .git directory might live) to this file. Invert
        # this to find the root from __file__.
        for _ in cfg.versionfile_source.split('/'):
            root = os.path.dirname(root)
    except NameError:
        return {"version": "0+unknown", "full-revisionid": None,
                "dirty": None,
                "error": "unable to find root of source tree",
                "date": None}

    try:
        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
        return render(pieces, cfg.style)
    except NotThisMethod:
        pass

    try:
        if cfg.parentdir_prefix:
            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
    except NotThisMethod:
        pass

    return {"version": "0+unknown", "full-revisionid": None,
            "dirty": None,
            "error": "unable to compute version", "date": None}
'''


@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
    # keywords. When used from setup.py, we don't want to import _version.py,
    # so we do it with a regexp instead. This function is not used from
    # _version.py.
    keywords: Dict[str, str] = {}
    try:
        with open(versionfile_abs, "r") as fobj:
            for line in fobj:
                if line.strip().startswith("git_refnames ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["refnames"] = mo.group(1)
                if line.strip().startswith("git_full ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["full"] = mo.group(1)
                if line.strip().startswith("git_date ="):
                    mo = re.search(r'=\s*"(.*)"', line)
                    if mo:
                        keywords["date"] = mo.group(1)
    except OSError:
        pass
    return keywords


@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
    keywords: Dict[str, str],
    tag_prefix: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Get version information from git keywords."""
    if "refnames" not in keywords:
        raise NotThisMethod("Short version file found")
    date = keywords.get("date")
    if date is not None:
        # Use only the last line.  Previous lines may contain GPG signature
        # information.
        date = date.splitlines()[-1]

        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = {r.strip() for r in refnames.strip("()").split(",")}
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = {r for r in refs if re.search(r'\d', r)}
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
        print("likely tags: %s" % ",".join(sorted(tags)))
    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
            # Filter out refs that exactly match prefix or that don't start
            # with a number once the prefix is stripped (mostly a concern
            # when prefix is '')
            if not re.match(r'\d', r):
                continue
            if verbose:
                print("picking %s" % r)
            return {"version": r,
                    "full-revisionid": keywords["full"].strip(),
                    "dirty": False, "error": None,
                    "date": date}
    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
        print("no suitable tags, using unknown + full revision id")
    return {"version": "0+unknown",
            "full-revisionid": keywords["full"].strip(),
            "dirty": False, "error": "no suitable tags", "date": None}


@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
    tag_prefix: str,
    root: str,
    verbose: bool,
    runner: Callable = run_command
) -> Dict[str, Any]:
    """Get version from 'git describe' in the root of the source tree.

    This only gets called if the git-archive 'subst' keywords were *not*
    expanded, and _version.py hasn't already been rewritten with a short
    version string, meaning we're inside a checked out source tree.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]

    # GIT_DIR can interfere with correct operation of Versioneer.
    # It may be intended to be passed to the Versioneer-versioned project,
    # but that should not change where we get our version from.
    env = os.environ.copy()
    env.pop("GIT_DIR", None)
    runner = functools.partial(runner, env=env)

    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
                   hide_stderr=not verbose)
    if rc != 0:
        if verbose:
            print("Directory %s not under git control" % root)
        raise NotThisMethod("'git rev-parse --git-dir' returned error")

    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
    # if there isn't one, this yields HEX[-dirty] (no NUM)
    describe_out, rc = runner(GITS, [
        "describe", "--tags", "--dirty", "--always", "--long",
        "--match", f"{tag_prefix}[[:digit:]]*"
    ], cwd=root)
    # --long was added in git-1.5.5
    if describe_out is None:
        raise NotThisMethod("'git describe' failed")
    describe_out = describe_out.strip()
    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
    if full_out is None:
        raise NotThisMethod("'git rev-parse' failed")
    full_out = full_out.strip()

    pieces: Dict[str, Any] = {}
    pieces["long"] = full_out
    pieces["short"] = full_out[:7]  # maybe improved later
    pieces["error"] = None

    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
                             cwd=root)
    # --abbrev-ref was added in git-1.6.3
    if rc != 0 or branch_name is None:
        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
    branch_name = branch_name.strip()

    if branch_name == "HEAD":
        # If we aren't exactly on a branch, pick a branch which represents
        # the current commit. If all else fails, we are on a branchless
        # commit.
        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
        # --contains was added in git-1.5.4
        if rc != 0 or branches is None:
            raise NotThisMethod("'git branch --contains' returned error")
        branches = branches.split("\n")

        # Remove the first line if we're running detached
        if "(" in branches[0]:
            branches.pop(0)

        # Strip off the leading "* " from the list of branches.
        branches = [branch[2:] for branch in branches]
        if "master" in branches:
            branch_name = "master"
        elif not branches:
            branch_name = None
        else:
            # Pick the first branch that is returned. Good or bad.
            branch_name = branches[0]

    pieces["branch"] = branch_name

    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
    # TAG might have hyphens.
    git_describe = describe_out

    # look for -dirty suffix
    dirty = git_describe.endswith("-dirty")
    pieces["dirty"] = dirty
    if dirty:
        git_describe = git_describe[:git_describe.rindex("-dirty")]

    # now we have TAG-NUM-gHEX or HEX

    if "-" in git_describe:
        # TAG-NUM-gHEX
        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
        if not mo:
            # unparsable. Maybe git-describe is misbehaving?
            pieces["error"] = ("unable to parse git-describe output: '%s'"
                               % describe_out)
            return pieces

        # tag
        full_tag = mo.group(1)
        if not full_tag.startswith(tag_prefix):
            if verbose:
                fmt = "tag '%s' doesn't start with prefix '%s'"
                print(fmt % (full_tag, tag_prefix))
            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
                               % (full_tag, tag_prefix))
            return pieces
        pieces["closest-tag"] = full_tag[len(tag_prefix):]

        # distance: number of commits since tag
        pieces["distance"] = int(mo.group(2))

        # commit: short hex revision ID
        pieces["short"] = mo.group(3)

    else:
        # HEX: no tags
        pieces["closest-tag"] = None
        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
        pieces["distance"] = len(out.split())  # total number of commits

    # commit date: see ISO-8601 comment in git_versions_from_keywords()
    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
    # Use only the last line.  Previous lines may contain GPG signature
    # information.
    date = date.splitlines()[-1]
    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

    return pieces


def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None:
    """Git-specific installation logic for Versioneer.

    For Git, this means creating/changing .gitattributes to mark _version.py
    for export-subst keyword substitution.
    """
    GITS = ["git"]
    if sys.platform == "win32":
        GITS = ["git.cmd", "git.exe"]
    files = [versionfile_source]
    if ipy:
        files.append(ipy)
    if "VERSIONEER_PEP518" not in globals():
        try:
            my_path = __file__
            if my_path.endswith((".pyc", ".pyo")):
                my_path = os.path.splitext(my_path)[0] + ".py"
            versioneer_file = os.path.relpath(my_path)
        except NameError:
            versioneer_file = "versioneer.py"
        files.append(versioneer_file)
    present = False
    try:
        with open(".gitattributes", "r") as fobj:
            for line in fobj:
                if line.strip().startswith(versionfile_source):
                    if "export-subst" in line.strip().split()[1:]:
                        present = True
                        break
    except OSError:
        pass
    if not present:
        with open(".gitattributes", "a+") as fobj:
            fobj.write(f"{versionfile_source} export-subst\n")
        files.append(".gitattributes")
    run_command(GITS, ["add", "--"] + files)


def versions_from_parentdir(
    parentdir_prefix: str,
    root: str,
    verbose: bool,
) -> Dict[str, Any]:
    """Try to determine the version from the parent directory name.

    Source tarballs conventionally unpack into a directory that includes both
    the project name and a version string. We will also support searching up
    two directory levels for an appropriately named parent directory
    """
    rootdirs = []

    for _ in range(3):
        dirname = os.path.basename(root)
        if dirname.startswith(parentdir_prefix):
            return {"version": dirname[len(parentdir_prefix):],
                    "full-revisionid": None,
                    "dirty": False, "error": None, "date": None}
        rootdirs.append(root)
        root = os.path.dirname(root)  # up a level

    if verbose:
        print("Tried directories %s but none started with prefix %s" %
              (str(rootdirs), parentdir_prefix))
    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


SHORT_VERSION_PY = """
# This file was generated by 'versioneer.py' (0.29) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.

import json

version_json = '''
%s
'''  # END VERSION_JSON


def get_versions():
    return json.loads(version_json)
"""


def versions_from_file(filename: str) -> Dict[str, Any]:
    """Try to determine the version from _version.py if present."""
    try:
        with open(filename) as f:
            contents = f.read()
    except OSError:
        raise NotThisMethod("unable to read _version.py")
    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
                   contents, re.M | re.S)
    if not mo:
        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
                       contents, re.M | re.S)
    if not mo:
        raise NotThisMethod("no version_json in _version.py")
    return json.loads(mo.group(1))


def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None:
    """Write the given version number to the given _version.py file."""
    contents = json.dumps(versions, sort_keys=True,
                          indent=1, separators=(",", ": "))
    with open(filename, "w") as f:
        f.write(SHORT_VERSION_PY % contents)

    print("set %s to '%s'" % (filename, versions["version"]))


def plus_or_dot(pieces: Dict[str, Any]) -> str:
    """Return a + if we don't already have one, else return a ."""
    if "+" in pieces.get("closest-tag", ""):
        return "."
    return "+"


def render_pep440(pieces: Dict[str, Any]) -> str:
    """Build up version string, with post-release "local version identifier".

    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

    Exceptions:
    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_branch(pieces: Dict[str, Any]) -> str:
    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .

    The ".dev0" means not master branch. Note that .dev0 sorts backwards
    (a feature branch will appear "older" than the master branch).

    Exceptions:
    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0"
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+untagged.%d.g%s" % (pieces["distance"],
                                          pieces["short"])
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
    """Split pep440 version string at the post-release segment.

    Returns the release segments before the post-release and the
    post-release version number (or -1 if no post-release segment is present).
    """
    vc = str.split(ver, ".post")
    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None


def render_pep440_pre(pieces: Dict[str, Any]) -> str:
    """TAG[.postN.devDISTANCE] -- No -dirty.

    Exceptions:
    1: no tags. 0.post0.devDISTANCE
    """
    if pieces["closest-tag"]:
        if pieces["distance"]:
            # update the post release segment
            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
            rendered = tag_version
            if post_version is not None:
                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
            else:
                rendered += ".post0.dev%d" % (pieces["distance"])
        else:
            # no commits, use the tag as the version
            rendered = pieces["closest-tag"]
    else:
        # exception #1
        rendered = "0.post0.dev%d" % pieces["distance"]
    return rendered


def render_pep440_post(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX] .

    The ".dev0" means dirty. Note that .dev0 sorts backwards
    (a dirty tree will appear "older" than the corresponding clean one),
    but you shouldn't be releasing software with -dirty anyways.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
    return rendered


def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .

    The ".dev0" means not master branch.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["branch"] != "master":
                rendered += ".dev0"
            rendered += plus_or_dot(pieces)
            rendered += "g%s" % pieces["short"]
            if pieces["dirty"]:
                rendered += ".dirty"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["branch"] != "master":
            rendered += ".dev0"
        rendered += "+g%s" % pieces["short"]
        if pieces["dirty"]:
            rendered += ".dirty"
    return rendered


def render_pep440_old(pieces: Dict[str, Any]) -> str:
    """TAG[.postDISTANCE[.dev0]] .

    The ".dev0" means dirty.

    Exceptions:
    1: no tags. 0.postDISTANCE[.dev0]
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"] or pieces["dirty"]:
            rendered += ".post%d" % pieces["distance"]
            if pieces["dirty"]:
                rendered += ".dev0"
    else:
        # exception #1
        rendered = "0.post%d" % pieces["distance"]
        if pieces["dirty"]:
            rendered += ".dev0"
    return rendered


def render_git_describe(pieces: Dict[str, Any]) -> str:
    """TAG[-DISTANCE-gHEX][-dirty].

    Like 'git describe --tags --dirty --always'.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        if pieces["distance"]:
            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render_git_describe_long(pieces: Dict[str, Any]) -> str:
    """TAG-DISTANCE-gHEX[-dirty].

    Like 'git describe --tags --dirty --always -long'.
    The distance/hash is unconditional.

    Exceptions:
    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
    """
    if pieces["closest-tag"]:
        rendered = pieces["closest-tag"]
        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
    else:
        # exception #1
        rendered = pieces["short"]
    if pieces["dirty"]:
        rendered += "-dirty"
    return rendered


def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
    """Render the given version pieces into the requested style."""
    if pieces["error"]:
        return {"version": "unknown",
                "full-revisionid": pieces.get("long"),
                "dirty": None,
                "error": pieces["error"],
                "date": None}

    if not style or style == "default":
        style = "pep440"  # the default

    if style == "pep440":
        rendered = render_pep440(pieces)
    elif style == "pep440-branch":
        rendered = render_pep440_branch(pieces)
    elif style == "pep440-pre":
        rendered = render_pep440_pre(pieces)
    elif style == "pep440-post":
        rendered = render_pep440_post(pieces)
    elif style == "pep440-post-branch":
        rendered = render_pep440_post_branch(pieces)
    elif style == "pep440-old":
        rendered = render_pep440_old(pieces)
    elif style == "git-describe":
        rendered = render_git_describe(pieces)
    elif style == "git-describe-long":
        rendered = render_git_describe_long(pieces)
    else:
        raise ValueError("unknown style '%s'" % style)

    return {"version": rendered, "full-revisionid": pieces["long"],
            "dirty": pieces["dirty"], "error": None,
            "date": pieces.get("date")}


class VersioneerBadRootError(Exception):
    """The project root directory is unknown or missing key files."""


def get_versions(verbose: bool = False) -> Dict[str, Any]:
    """Get the project version from whatever source is available.

    Returns dict with two keys: 'version' and 'full'.
    """
    if "versioneer" in sys.modules:
        # see the discussion in cmdclass.py:get_cmdclass()
        del sys.modules["versioneer"]

    root = get_root()
    cfg = get_config_from_root(root)

    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
    handlers = HANDLERS.get(cfg.VCS)
    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
    verbose = verbose or bool(cfg.verbose)  # `bool()` used to avoid `None`
    assert cfg.versionfile_source is not None, \
        "please set versioneer.versionfile_source"
    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"

    versionfile_abs = os.path.join(root, cfg.versionfile_source)

    # extract version from first of: _version.py, VCS command (e.g. 'git
    # describe'), parentdir. This is meant to work for developers using a
    # source checkout, for users of a tarball created by 'setup.py sdist',
    # and for users of a tarball/zipball created by 'git archive' or github's
    # download-from-tag feature or the equivalent in other VCSes.

    get_keywords_f = handlers.get("get_keywords")
    from_keywords_f = handlers.get("keywords")
    if get_keywords_f and from_keywords_f:
        try:
            keywords = get_keywords_f(versionfile_abs)
            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
            if verbose:
                print("got version from expanded keyword %s" % ver)
            return ver
        except NotThisMethod:
            pass

    try:
        ver = versions_from_file(versionfile_abs)
        if verbose:
            print("got version from file %s %s" % (versionfile_abs, ver))
        return ver
    except NotThisMethod:
        pass

    from_vcs_f = handlers.get("pieces_from_vcs")
    if from_vcs_f:
        try:
            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
            ver = render(pieces, cfg.style)
            if verbose:
                print("got version from VCS %s" % ver)
            return ver
        except NotThisMethod:
            pass

    try:
        if cfg.parentdir_prefix:
            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
            if verbose:
                print("got version from parentdir %s" % ver)
            return ver
    except NotThisMethod:
        pass

    if verbose:
        print("unable to compute version")

    return {"version": "0+unknown", "full-revisionid": None,
            "dirty": None, "error": "unable to compute version",
            "date": None}


def get_version() -> str:
    """Get the short version string for this project."""
    return get_versions()["version"]


def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None):
    """Get the custom setuptools subclasses used by Versioneer.

    If the package uses a different cmdclass (e.g. one from numpy), it
    should be provide as an argument.
    """
    if "versioneer" in sys.modules:
        del sys.modules["versioneer"]
        # this fixes the "python setup.py develop" case (also 'install' and
        # 'easy_install .'), in which subdependencies of the main project are
        # built (using setup.py bdist_egg) in the same python process. Assume
        # a main project A and a dependency B, which use different versions
        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
        # sys.modules by the time B's setup.py is executed, causing B to run
        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
        # sandbox that restores sys.modules to it's pre-build state, so the
        # parent is protected against the child's "import versioneer". By
        # removing ourselves from sys.modules here, before the child build
        # happens, we protect the child from the parent's versioneer too.
        # Also see https://github.com/python-versioneer/python-versioneer/issues/52

    cmds = {} if cmdclass is None else cmdclass.copy()

    # we add "version" to setuptools
    from setuptools import Command

    class cmd_version(Command):
        description = "report generated version string"
        user_options: List[Tuple[str, str, str]] = []
        boolean_options: List[str] = []

        def initialize_options(self) -> None:
            pass

        def finalize_options(self) -> None:
            pass

        def run(self) -> None:
            vers = get_versions(verbose=True)
            print("Version: %s" % vers["version"])
            print(" full-revisionid: %s" % vers.get("full-revisionid"))
            print(" dirty: %s" % vers.get("dirty"))
            print(" date: %s" % vers.get("date"))
            if vers["error"]:
                print(" error: %s" % vers["error"])
    cmds["version"] = cmd_version

    # we override "build_py" in setuptools
    #
    # most invocation pathways end up running build_py:
    #  distutils/build -> build_py
    #  distutils/install -> distutils/build ->..
    #  setuptools/bdist_wheel -> distutils/install ->..
    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
    #  setuptools/install -> bdist_egg ->..
    #  setuptools/develop -> ?
    #  pip install:
    #   copies source tree to a tempdir before running egg_info/etc
    #   if .git isn't copied too, 'git describe' will fail
    #   then does setup.py bdist_wheel, or sometimes setup.py install
    #  setup.py egg_info -> ?

    # pip install -e . and setuptool/editable_wheel will invoke build_py
    # but the build_py command is not expected to copy any files.

    # we override different "build_py" commands for both environments
    if 'build_py' in cmds:
        _build_py: Any = cmds['build_py']
    else:
        from setuptools.command.build_py import build_py as _build_py

    class cmd_build_py(_build_py):
        def run(self) -> None:
            root = get_root()
            cfg = get_config_from_root(root)
            versions = get_versions()
            _build_py.run(self)
            if getattr(self, "editable_mode", False):
                # During editable installs `.py` and data files are
                # not copied to build_lib
                return
            # now locate _version.py in the new build/ directory and replace
            # it with an updated value
            if cfg.versionfile_build:
                target_versionfile = os.path.join(self.build_lib,
                                                  cfg.versionfile_build)
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)
    cmds["build_py"] = cmd_build_py

    if 'build_ext' in cmds:
        _build_ext: Any = cmds['build_ext']
    else:
        from setuptools.command.build_ext import build_ext as _build_ext

    class cmd_build_ext(_build_ext):
        def run(self) -> None:
            root = get_root()
            cfg = get_config_from_root(root)
            versions = get_versions()
            _build_ext.run(self)
            if self.inplace:
                # build_ext --inplace will only build extensions in
                # build/lib<..> dir with no _version.py to write to.
                # As in place builds will already have a _version.py
                # in the module dir, we do not need to write one.
                return
            # now locate _version.py in the new build/ directory and replace
            # it with an updated value
            if not cfg.versionfile_build:
                return
            target_versionfile = os.path.join(self.build_lib,
                                              cfg.versionfile_build)
            if not os.path.exists(target_versionfile):
                print(f"Warning: {target_versionfile} does not exist, skipping "
                      "version update. This can happen if you are running build_ext "
                      "without first running build_py.")
                return
            print("UPDATING %s" % target_versionfile)
            write_to_version_file(target_versionfile, versions)
    cmds["build_ext"] = cmd_build_ext

    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
        from cx_Freeze.dist import build_exe as _build_exe  # type: ignore
        # nczeczulin reports that py2exe won't like the pep440-style string
        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
        # setup(console=[{
        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
        #   "product_version": versioneer.get_version(),
        #   ...

        class cmd_build_exe(_build_exe):
            def run(self) -> None:
                root = get_root()
                cfg = get_config_from_root(root)
                versions = get_versions()
                target_versionfile = cfg.versionfile_source
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

                _build_exe.run(self)
                os.unlink(target_versionfile)
                with open(cfg.versionfile_source, "w") as f:
                    LONG = LONG_VERSION_PY[cfg.VCS]
                    f.write(LONG %
                            {"DOLLAR": "$",
                             "STYLE": cfg.style,
                             "TAG_PREFIX": cfg.tag_prefix,
                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
                             })
        cmds["build_exe"] = cmd_build_exe
        del cmds["build_py"]

    if 'py2exe' in sys.modules:  # py2exe enabled?
        try:
            from py2exe.setuptools_buildexe import py2exe as _py2exe  # type: ignore
        except ImportError:
            from py2exe.distutils_buildexe import py2exe as _py2exe  # type: ignore

        class cmd_py2exe(_py2exe):
            def run(self) -> None:
                root = get_root()
                cfg = get_config_from_root(root)
                versions = get_versions()
                target_versionfile = cfg.versionfile_source
                print("UPDATING %s" % target_versionfile)
                write_to_version_file(target_versionfile, versions)

                _py2exe.run(self)
                os.unlink(target_versionfile)
                with open(cfg.versionfile_source, "w") as f:
                    LONG = LONG_VERSION_PY[cfg.VCS]
                    f.write(LONG %
                            {"DOLLAR": "$",
                             "STYLE": cfg.style,
                             "TAG_PREFIX": cfg.tag_prefix,
                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
                             })
        cmds["py2exe"] = cmd_py2exe

    # sdist farms its file list building out to egg_info
    if 'egg_info' in cmds:
        _egg_info: Any = cmds['egg_info']
    else:
        from setuptools.command.egg_info import egg_info as _egg_info

    class cmd_egg_info(_egg_info):
        def find_sources(self) -> None:
            # egg_info.find_sources builds the manifest list and writes it
            # in one shot
            super().find_sources()

            # Modify the filelist and normalize it
            root = get_root()
            cfg = get_config_from_root(root)
            self.filelist.append('versioneer.py')
            if cfg.versionfile_source:
                # There are rare cases where versionfile_source might not be
                # included by default, so we must be explicit
                self.filelist.append(cfg.versionfile_source)
            self.filelist.sort()
            self.filelist.remove_duplicates()

            # The write method is hidden in the manifest_maker instance that
            # generated the filelist and was thrown away
            # We will instead replicate their final normalization (to unicode,
            # and POSIX-style paths)
            from setuptools import unicode_utils
            normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/')
                          for f in self.filelist.files]

            manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt')
            with open(manifest_filename, 'w') as fobj:
                fobj.write('\n'.join(normalized))

    cmds['egg_info'] = cmd_egg_info

    # we override different "sdist" commands for both environments
    if 'sdist' in cmds:
        _sdist: Any = cmds['sdist']
    else:
        from setuptools.command.sdist import sdist as _sdist

    class cmd_sdist(_sdist):
        def run(self) -> None:
            versions = get_versions()
            self._versioneer_generated_versions = versions
            # unless we update this, the command will keep using the old
            # version
            self.distribution.metadata.version = versions["version"]
            return _sdist.run(self)

        def make_release_tree(self, base_dir: str, files: List[str]) -> None:
            root = get_root()
            cfg = get_config_from_root(root)
            _sdist.make_release_tree(self, base_dir, files)
            # now locate _version.py in the new base_dir directory
            # (remembering that it may be a hardlink) and replace it with an
            # updated value
            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
            print("UPDATING %s" % target_versionfile)
            write_to_version_file(target_versionfile,
                                  self._versioneer_generated_versions)
    cmds["sdist"] = cmd_sdist

    return cmds


CONFIG_ERROR = """
setup.cfg is missing the necessary Versioneer configuration. You need
a section like:

 [versioneer]
 VCS = git
 style = pep440
 versionfile_source = src/myproject/_version.py
 versionfile_build = myproject/_version.py
 tag_prefix =
 parentdir_prefix = myproject-

You will also need to edit your setup.py to use the results:

 import versioneer
 setup(version=versioneer.get_version(),
       cmdclass=versioneer.get_cmdclass(), ...)

Please read the docstring in ./versioneer.py for configuration instructions,
edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
"""

SAMPLE_CONFIG = """
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.

[versioneer]
#VCS = git
#style = pep440
#versionfile_source =
#versionfile_build =
#tag_prefix =
#parentdir_prefix =

"""

OLD_SNIPPET = """
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
"""

INIT_PY_SNIPPET = """
from . import {0}
__version__ = {0}.get_versions()['version']
"""


def do_setup() -> int:
    """Do main VCS-independent setup function for installing Versioneer."""
    root = get_root()
    try:
        cfg = get_config_from_root(root)
    except (OSError, configparser.NoSectionError,
            configparser.NoOptionError) as e:
        if isinstance(e, (OSError, configparser.NoSectionError)):
            print("Adding sample versioneer config to setup.cfg",
                  file=sys.stderr)
            with open(os.path.join(root, "setup.cfg"), "a") as f:
                f.write(SAMPLE_CONFIG)
        print(CONFIG_ERROR, file=sys.stderr)
        return 1

    print(" creating %s" % cfg.versionfile_source)
    with open(cfg.versionfile_source, "w") as f:
        LONG = LONG_VERSION_PY[cfg.VCS]
        f.write(LONG % {"DOLLAR": "$",
                        "STYLE": cfg.style,
                        "TAG_PREFIX": cfg.tag_prefix,
                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
                        })

    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
                       "__init__.py")
    maybe_ipy: Optional[str] = ipy
    if os.path.exists(ipy):
        try:
            with open(ipy, "r") as f:
                old = f.read()
        except OSError:
            old = ""
        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
        snippet = INIT_PY_SNIPPET.format(module)
        if OLD_SNIPPET in old:
            print(" replacing boilerplate in %s" % ipy)
            with open(ipy, "w") as f:
                f.write(old.replace(OLD_SNIPPET, snippet))
        elif snippet not in old:
            print(" appending to %s" % ipy)
            with open(ipy, "a") as f:
                f.write(snippet)
        else:
            print(" %s unmodified" % ipy)
    else:
        print(" %s doesn't exist, ok" % ipy)
        maybe_ipy = None

    # Make VCS-specific changes. For git, this means creating/changing
    # .gitattributes to mark _version.py for export-subst keyword
    # substitution.
    do_vcs_install(cfg.versionfile_source, maybe_ipy)
    return 0


def scan_setup_py() -> int:
    """Validate the contents of setup.py against Versioneer's expectations."""
    found = set()
    setters = False
    errors = 0
    with open("setup.py", "r") as f:
        for line in f.readlines():
            if "import versioneer" in line:
                found.add("import")
            if "versioneer.get_cmdclass()" in line:
                found.add("cmdclass")
            if "versioneer.get_version()" in line:
                found.add("get_version")
            if "versioneer.VCS" in line:
                setters = True
            if "versioneer.versionfile_source" in line:
                setters = True
    if len(found) != 3:
        print("")
        print("Your setup.py appears to be missing some important items")
        print("(but I might be wrong). Please make sure it has something")
        print("roughly like the following:")
        print("")
        print(" import versioneer")
        print(" setup( version=versioneer.get_version(),")
        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
        print("")
        errors += 1
    if setters:
        print("You should remove lines like 'versioneer.VCS = ' and")
        print("'versioneer.versionfile_source = ' . This configuration")
        print("now lives in setup.cfg, and should be removed from setup.py")
        print("")
        errors += 1
    return errors


def setup_command() -> NoReturn:
    """Set up Versioneer and exit with appropriate error code."""
    errors = do_setup()
    errors += scan_setup_py()
    sys.exit(1 if errors else 0)


if __name__ == "__main__":
    cmd = sys.argv[1]
    if cmd == "setup":
        setup_command()