Repository: choderalab/espaloma Branch: main Commit: 413eb5507403 Files: 297 Total size: 652.0 KB Directory structure: gitextract_8sr_6d4i/ ├── .codecov.yml ├── .gitattributes ├── .github/ │ └── workflows/ │ ├── CI.yaml │ ├── clean_cache.yaml │ └── docker.yaml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── devtools/ │ ├── README.md │ ├── conda-envs/ │ │ └── espaloma.yaml │ ├── conda-recipe/ │ │ ├── build.sh │ │ └── meta.yml │ ├── gh-actions/ │ │ └── initialize_conda.sh │ └── scripts/ │ └── create_conda_env.py ├── docker/ │ └── Dockerfile ├── docs/ │ ├── Makefile │ ├── README.md │ ├── _static/ │ │ └── README.md │ ├── _templates/ │ │ ├── README.md │ │ ├── custom-class-template.rst │ │ └── custom-module-template.rst │ ├── api.rst │ ├── autosummary/ │ │ ├── espaloma.data.collection.alkethoh.rst │ │ ├── espaloma.data.collection.esol.rst │ │ ├── espaloma.data.collection.md17_new.rst │ │ ├── espaloma.data.collection.md17_old.rst │ │ ├── espaloma.data.collection.qca.rst │ │ ├── espaloma.data.collection.rst │ │ ├── espaloma.data.collection.zinc.rst │ │ ├── espaloma.data.dataset.Dataset.rst │ │ ├── espaloma.data.dataset.GraphDataset.rst │ │ ├── espaloma.data.dataset.rst │ │ ├── espaloma.data.md.MoleculeVacuumSimulation.rst │ │ ├── espaloma.data.md.rst │ │ ├── espaloma.data.md.subtract_nonbonded_force.rst │ │ ├── espaloma.data.md.subtract_nonbonded_force_except_14.rst │ │ ├── espaloma.data.md17_utils.get_molecule.rst │ │ ├── espaloma.data.md17_utils.realize_molecule.rst │ │ ├── espaloma.data.md17_utils.rst │ │ ├── espaloma.data.md17_utils.sum_offsets.rst │ │ ├── espaloma.data.normalize.BaseNormalize.rst │ │ ├── espaloma.data.normalize.DatasetLogNormalNormalize.rst │ │ ├── espaloma.data.normalize.DatasetNormalNormalize.rst │ │ ├── espaloma.data.normalize.ESOL100LogNormalNormalize.rst │ │ ├── espaloma.data.normalize.ESOL100NormalNormalize.rst │ │ ├── espaloma.data.normalize.NotNormalize.rst │ │ ├── espaloma.data.normalize.PositiveNotNormalize.rst │ │ ├── espaloma.data.normalize.rst │ │ ├── espaloma.data.qcarchive_utils.MolWithTargets.rst │ │ ├── espaloma.data.qcarchive_utils.breakdown_along_time_axis.rst │ │ ├── espaloma.data.qcarchive_utils.fetch_td_record.rst │ │ ├── espaloma.data.qcarchive_utils.get_client.rst │ │ ├── espaloma.data.qcarchive_utils.get_collection.rst │ │ ├── espaloma.data.qcarchive_utils.get_energy_and_gradient.rst │ │ ├── espaloma.data.qcarchive_utils.get_graph.rst │ │ ├── espaloma.data.qcarchive_utils.h5_to_dataset.rst │ │ ├── espaloma.data.qcarchive_utils.make_batch_size_consistent.rst │ │ ├── espaloma.data.qcarchive_utils.rst │ │ ├── espaloma.data.qcarchive_utils.weight_by_snapshots.rst │ │ ├── espaloma.data.rst │ │ ├── espaloma.data.utils.batch.rst │ │ ├── espaloma.data.utils.collate_fn.rst │ │ ├── espaloma.data.utils.from_csv.rst │ │ ├── espaloma.data.utils.infer_mol_from_coordinates.rst │ │ ├── espaloma.data.utils.make_temp_directory.rst │ │ ├── espaloma.data.utils.normalize.rst │ │ ├── espaloma.data.utils.rst │ │ ├── espaloma.data.utils.split.rst │ │ ├── espaloma.data.utils.sum_offsets.rst │ │ ├── espaloma.graphs.deploy.load_forcefield.rst │ │ ├── espaloma.graphs.deploy.openmm_system_from_graph.rst │ │ ├── espaloma.graphs.deploy.rst │ │ ├── espaloma.graphs.graph.BaseGraph.rst │ │ ├── espaloma.graphs.graph.Graph.rst │ │ ├── espaloma.graphs.graph.rst │ │ ├── espaloma.graphs.legacy_force_field.LegacyForceField.rst │ │ ├── espaloma.graphs.legacy_force_field.rst │ │ ├── espaloma.graphs.rst │ │ ├── espaloma.graphs.utils.offmol_indices.angle_indices.rst │ │ ├── espaloma.graphs.utils.offmol_indices.atom_indices.rst │ │ ├── espaloma.graphs.utils.offmol_indices.bond_indices.rst │ │ ├── espaloma.graphs.utils.offmol_indices.improper_torsion_indices.rst │ │ ├── espaloma.graphs.utils.offmol_indices.proper_torsion_indices.rst │ │ ├── espaloma.graphs.utils.offmol_indices.rst │ │ ├── espaloma.graphs.utils.read_heterogeneous_graph.duplicate_index_ordering.rst │ │ ├── espaloma.graphs.utils.read_heterogeneous_graph.from_homogeneous_and_mol.rst │ │ ├── espaloma.graphs.utils.read_heterogeneous_graph.relationship_indices_from_offmol.rst │ │ ├── espaloma.graphs.utils.read_heterogeneous_graph.rst │ │ ├── espaloma.graphs.utils.read_homogeneous_graph.fp_oe.rst │ │ ├── espaloma.graphs.utils.read_homogeneous_graph.fp_rdkit.rst │ │ ├── espaloma.graphs.utils.read_homogeneous_graph.from_oemol.rst │ │ ├── espaloma.graphs.utils.read_homogeneous_graph.from_openff_toolkit_mol.rst │ │ ├── espaloma.graphs.utils.read_homogeneous_graph.from_rdkit_mol.rst │ │ ├── espaloma.graphs.utils.read_homogeneous_graph.rst │ │ ├── espaloma.graphs.utils.rst │ │ ├── espaloma.mm.angle.angle_high.rst │ │ ├── espaloma.mm.angle.bond_angle.rst │ │ ├── espaloma.mm.angle.bond_bond.rst │ │ ├── espaloma.mm.angle.harmonic_angle.rst │ │ ├── espaloma.mm.angle.linear_mixture_angle.rst │ │ ├── espaloma.mm.angle.rst │ │ ├── espaloma.mm.angle.urey_bradley.rst │ │ ├── espaloma.mm.bond.bond_high.rst │ │ ├── espaloma.mm.bond.gaussian_bond.rst │ │ ├── espaloma.mm.bond.harmonic_bond.rst │ │ ├── espaloma.mm.bond.linear_mixture_bond.rst │ │ ├── espaloma.mm.bond.rst │ │ ├── espaloma.mm.energy.CarryII.rst │ │ ├── espaloma.mm.energy.EnergyInGraph.rst │ │ ├── espaloma.mm.energy.EnergyInGraphII.rst │ │ ├── espaloma.mm.energy.apply_angle.rst │ │ ├── espaloma.mm.energy.apply_angle_ii.rst │ │ ├── espaloma.mm.energy.apply_angle_linear_mixture.rst │ │ ├── espaloma.mm.energy.apply_bond.rst │ │ ├── espaloma.mm.energy.apply_bond_gaussian.rst │ │ ├── espaloma.mm.energy.apply_bond_ii.rst │ │ ├── espaloma.mm.energy.apply_bond_linear_mixture.rst │ │ ├── espaloma.mm.energy.apply_improper_torsion.rst │ │ ├── espaloma.mm.energy.apply_nonbonded.rst │ │ ├── espaloma.mm.energy.apply_torsion.rst │ │ ├── espaloma.mm.energy.apply_torsion_ii.rst │ │ ├── espaloma.mm.energy.energy_in_graph.rst │ │ ├── espaloma.mm.energy.energy_in_graph_ii.rst │ │ ├── espaloma.mm.energy.rst │ │ ├── espaloma.mm.functional.gaussian.rst │ │ ├── espaloma.mm.functional.harmonic.rst │ │ ├── espaloma.mm.functional.harmonic_harmonic_coupled.rst │ │ ├── espaloma.mm.functional.harmonic_harmonic_periodic_coupled.rst │ │ ├── espaloma.mm.functional.harmonic_periodic_coupled.rst │ │ ├── espaloma.mm.functional.linear_mixture.rst │ │ ├── espaloma.mm.functional.linear_mixture_to_original.rst │ │ ├── espaloma.mm.functional.lj.rst │ │ ├── espaloma.mm.functional.periodic.rst │ │ ├── espaloma.mm.functional.periodic_fixed_phases.rst │ │ ├── espaloma.mm.functional.rst │ │ ├── espaloma.mm.geometry.GeometryInGraph.rst │ │ ├── espaloma.mm.geometry.angle.rst │ │ ├── espaloma.mm.geometry.apply_angle.rst │ │ ├── espaloma.mm.geometry.apply_bond.rst │ │ ├── espaloma.mm.geometry.apply_torsion.rst │ │ ├── espaloma.mm.geometry.copy_src.rst │ │ ├── espaloma.mm.geometry.dihedral.rst │ │ ├── espaloma.mm.geometry.distance.rst │ │ ├── espaloma.mm.geometry.geometry_in_graph.rst │ │ ├── espaloma.mm.geometry.reduce_stack.rst │ │ ├── espaloma.mm.geometry.rst │ │ ├── espaloma.mm.nonbonded.arithmetic_mean.rst │ │ ├── espaloma.mm.nonbonded.geometric_mean.rst │ │ ├── espaloma.mm.nonbonded.lj_12_6.rst │ │ ├── espaloma.mm.nonbonded.lj_9_6.rst │ │ ├── espaloma.mm.nonbonded.lorentz_berthelot.rst │ │ ├── espaloma.mm.nonbonded.rst │ │ ├── espaloma.mm.rst │ │ ├── espaloma.mm.torsion.angle_angle.rst │ │ ├── espaloma.mm.torsion.angle_angle_torsion.rst │ │ ├── espaloma.mm.torsion.angle_torsion.rst │ │ ├── espaloma.mm.torsion.bond_torsion.rst │ │ ├── espaloma.mm.torsion.periodic_torsion.rst │ │ ├── espaloma.mm.torsion.rst │ │ ├── espaloma.nn.baselines.FreeParameterBaseline.rst │ │ ├── espaloma.nn.baselines.FreeParameterBaselineInitMean.rst │ │ ├── espaloma.nn.baselines.rst │ │ ├── espaloma.nn.layers.dgl_legacy.GN.rst │ │ ├── espaloma.nn.layers.dgl_legacy.rst │ │ ├── espaloma.nn.layers.rst │ │ ├── espaloma.nn.readout.base_readout.BaseReadout.rst │ │ ├── espaloma.nn.readout.base_readout.rst │ │ ├── espaloma.nn.readout.charge_equilibrium.ChargeEquilibrium.rst │ │ ├── espaloma.nn.readout.charge_equilibrium.get_charges.rst │ │ ├── espaloma.nn.readout.charge_equilibrium.rst │ │ ├── espaloma.nn.readout.graph_level_readout.GraphLevelReadout.rst │ │ ├── espaloma.nn.readout.graph_level_readout.rst │ │ ├── espaloma.nn.readout.janossy.ExpCoefficients.rst │ │ ├── espaloma.nn.readout.janossy.JanossyPooling.rst │ │ ├── espaloma.nn.readout.janossy.JanossyPoolingImproper.rst │ │ ├── espaloma.nn.readout.janossy.JanossyPoolingNonbonded.rst │ │ ├── espaloma.nn.readout.janossy.LinearMixtureToOriginal.rst │ │ ├── espaloma.nn.readout.janossy.rst │ │ ├── espaloma.nn.readout.node_typing.NodeTyping.rst │ │ ├── espaloma.nn.readout.node_typing.rst │ │ ├── espaloma.nn.readout.rst │ │ ├── espaloma.nn.rst │ │ ├── espaloma.nn.sequential.Sequential.rst │ │ └── espaloma.nn.sequential.rst │ ├── conf.py │ ├── deploy.rst │ ├── download_experiments.sh │ ├── experiments/ │ │ ├── index.rst │ │ ├── mm_fitting_small.rst │ │ ├── qm_fitting.rst │ │ └── typing.rst │ ├── index.rst │ ├── install.rst │ ├── make.bat │ └── qm_fitting.rst ├── espaloma/ │ ├── .py │ ├── __init__.py │ ├── _version.py │ ├── app/ │ │ ├── __init__.py │ │ ├── experiment.py │ │ ├── report.py │ │ ├── tests/ │ │ │ └── test_experiment.py │ │ ├── train.py │ │ ├── train_all_params.py │ │ ├── train_bonded_energy.py │ │ └── train_multi_typing.py │ ├── data/ │ │ ├── __init__.py │ │ ├── collection.py │ │ ├── dataset.py │ │ ├── md.py │ │ ├── md17_utils.py │ │ ├── normalize.py │ │ ├── off-mol_0_10_6.json │ │ ├── qcarchive_utils.py │ │ ├── tests/ │ │ │ ├── test_collection.py │ │ │ ├── test_dataset.py │ │ │ ├── test_md.py │ │ │ ├── test_normalize.py │ │ │ ├── test_qcarchive.py │ │ │ └── test_save_and_load.py │ │ └── utils.py │ ├── graphs/ │ │ ├── __init__.py │ │ ├── deploy.py │ │ ├── graph.py │ │ ├── legacy_force_field.py │ │ ├── tests/ │ │ │ ├── test_deploy.py │ │ │ ├── test_gaff_parametrize.py │ │ │ ├── test_graph.py │ │ │ └── test_smirnoff.py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── offmol_indices.py │ │ ├── read_heterogeneous_graph.py │ │ ├── read_homogeneous_graph.py │ │ └── regenerate_impropers.py │ ├── metrics.py │ ├── mm/ │ │ ├── __init__.py │ │ ├── angle.py │ │ ├── bond.py │ │ ├── energy.py │ │ ├── functional.py │ │ ├── geometry.py │ │ ├── nonbonded.py │ │ ├── tests/ │ │ │ ├── system.xml │ │ │ ├── test_angle.py │ │ │ ├── test_angle_energy.py │ │ │ ├── test_bond_energy.py │ │ │ ├── test_charge_energy_consistency.py │ │ │ ├── test_charge_energy_consistency_hardcode.py │ │ │ ├── test_dihedral.py │ │ │ ├── test_distance.py │ │ │ ├── test_energy.py │ │ │ ├── test_energy_gaussian.py │ │ │ ├── test_energy_ii.py │ │ │ ├── test_geometry.py │ │ │ ├── test_linear_combination.py │ │ │ ├── test_openmm_consistency.py │ │ │ └── test_recoverability.py │ │ └── torsion.py │ ├── nn/ │ │ ├── __init__.py │ │ ├── baselines.py │ │ ├── layers/ │ │ │ ├── __init__.py │ │ │ └── dgl_legacy.py │ │ ├── readout/ │ │ │ ├── __init__.py │ │ │ ├── base_readout.py │ │ │ ├── charge_equilibrium.py │ │ │ ├── graph_level_readout.py │ │ │ ├── janossy.py │ │ │ └── node_typing.py │ │ ├── sequential.py │ │ └── tests/ │ │ ├── test_baseline.py │ │ ├── test_janossy.py │ │ └── test_simple_net.py │ ├── units.py │ └── utils/ │ ├── geometry.py │ ├── model_fetch.py │ └── tests/ │ └── test_model_fetch.py ├── requirements.txt ├── scripts/ │ ├── README.md │ └── perses-benchmark/ │ ├── README.md │ ├── espaloma-perses.export.yaml │ ├── espaloma-perses.yaml │ └── tyk2/ │ ├── README.md │ ├── espaloma-0.2.2/ │ │ ├── LSF-job-template.sh │ │ ├── README.md │ │ ├── benchmark_analysis.py │ │ ├── run_benchmarks.py │ │ └── template.yaml │ └── openff-1.2.0/ │ ├── LSF-job-template.sh │ ├── README.md │ ├── benchmark_analysis.py │ ├── run_benchmarks.py │ └── template.yaml ├── setup.cfg ├── setup.py └── versioneer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .codecov.yml ================================================ # Codecov configuration to make it a bit less noisy coverage: status: patch: false project: default: threshold: 50% comment: layout: "header" require_changes: false branches: null behavior: default flags: null paths: null ================================================ FILE: .gitattributes ================================================ *.ipynb linguist-documentation *.html linguist-documentation espaloma/_version.py export-subst ================================================ FILE: .github/workflows/CI.yaml ================================================ name: CI on: pull_request: branches: - main push: branches: - main schedule: # Nightly tests run on master by default: # Scheduled workflows run on the latest commit on the default or base branch. # (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule) - cron: "0 0 * * *" concurrency: group: "${{ github.workflow }}-${{ github.ref }}" cancel-in-progress: true defaults: run: shell: bash -leo pipefail {0} jobs: test: name: ${{ matrix.os }}, Python ${{ matrix.python-version }} runs-on: ${{ matrix.os }}-latest strategy: fail-fast: false matrix: os: ['ubuntu','macos'] python-version: - "3.12" - "3.11" - "3.10" env: OPENMM: ${{ matrix.cfg.openmm }} OE_LICENSE: ${{ github.workspace }}/oe_license.txt steps: - uses: actions/checkout@v3 - name: Get current date id: date run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}" - uses: mamba-org/setup-micromamba@v1 with: environment-file: devtools/conda-envs/espaloma.yaml cache-environment: true cache-downloads: true cache-environment-key: environment-${{ steps.date.outputs.date }} cache-downloads-key: downloads-${{ steps.date.outputs.date }} create-args: >- python=${{ matrix.python-version }} - name: Additional info about the build shell: bash run: | uname -a df -h ulimit -a - name: Environment Information run: | micromamba info micromamba list micromamba --version - name: Install package run: | python -m pip install --no-deps -e . - name: Run tests run: | pytest -v --cov=espaloma --cov-report=xml --color=yes espaloma/ - name: CodeCov uses: codecov/codecov-action@v3 if: ${{ github.repository == 'choderalab/espaloma' && github.event_name == 'pull_request' }} with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml flags: unittests yml: ./.codecov.yml fail_ci_if_error: False verbose: True ================================================ FILE: .github/workflows/clean_cache.yaml ================================================ # from https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#force-deleting-cache-entries name: cleanup caches by a branch on: pull_request: types: - closed jobs: cleanup: runs-on: ubuntu-latest steps: - name: Check out code uses: actions/checkout@v3 - name: Cleanup run: | gh extension install actions/gh-actions-cache REPO=${{ github.repository }} BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge" echo "Fetching list of cache key" cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 ) ## Setting this to not fail the workflow while deleting cache keys. set +e echo "Deleting caches..." for cacheKey in $cacheKeysForPR do gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm done echo "Done" env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/docker.yaml ================================================ # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by # separate terms of service, privacy policy, and support # documentation. # GitHub recommends pinning actions to a commit SHA. # To get a newer version, you will need to update the SHA. # You can also reference a tag or branch, but the action may change without warning. name: Create and publish a Docker image on: workflow_dispatch: defaults: run: shell: bash -l {0} env: REGISTRY: ghcr.io IMAGE_NAME: choderalab/espaloma jobs: build-and-push-image: runs-on: ubuntu-latest permissions: contents: read packages: write steps: - name: Free disk space run: | sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true sudo rm -rf \ /usr/share/dotnet /usr/local/lib/android /opt/ghc \ /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ /usr/lib/jvm || true echo "some directories deleted" sudo apt install aptitude -y >/dev/null 2>&1 sudo aptitude purge aria2 ansible azure-cli shellcheck rpm xorriso zsync \ esl-erlang firefox gfortran-8 gfortran-9 google-chrome-stable \ google-cloud-sdk imagemagick \ libmagickcore-dev libmagickwand-dev libmagic-dev ant ant-optional kubectl \ mercurial apt-transport-https mono-complete libmysqlclient \ unixodbc-dev yarn chrpath libssl-dev libxft-dev \ libfreetype6 libfreetype6-dev libfontconfig1 libfontconfig1-dev \ snmp pollinate libpq-dev postgresql-client powershell ruby-full \ sphinxsearch subversion mongodb-org azure-cli microsoft-edge-stable \ -y -f >/dev/null 2>&1 sudo aptitude purge google-cloud-sdk -f -y >/dev/null 2>&1 sudo aptitude purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true sudo apt purge microsoft-edge-stable -f -y >/dev/null 2>&1 || true sudo aptitude purge '~n ^mysql' -f -y >/dev/null 2>&1 sudo aptitude purge '~n ^php' -f -y >/dev/null 2>&1 sudo aptitude purge '~n ^dotnet' -f -y >/dev/null 2>&1 sudo apt-get autoremove -y >/dev/null 2>&1 sudo apt-get autoclean -y >/dev/null 2>&1 echo "some packages purged" - name: Checkout repository uses: actions/checkout@v3 with: fetch-depth: 0 - name: Get Latest Version id: latest-version run: | LATEST_TAG=$(git describe --tags $(git rev-list --tags --max-count=1)) echo $LATEST_TAG echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_OUTPUT VERSION=$LATEST_TAG echo $VERSION echo "VERSION=$VERSION" >> $GITHUB_OUTPUT - name: Print Latest Version run: echo ${{ steps.latest-version.outputs.VERSION }} # Now that we got the version, we don't need the .git folder - name: Get more space run: | df . -h sudo rm -rf ${GITHUB_WORKSPACE}/.git df . -h - name: Create fully qualified image registry path id: fqirp run: | FQIRP=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }} echo "FQIRP=$FQIRP" >> $GITHUB_OUTPUT - name: Print FQIRP run: echo ${{ steps.fqirp.outputs.FQIRP }} - name: Log in to the Container registry uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | type=schedule,pattern=nightly,enable=true,priority=1000 type=ref,event=branch,enable=true,priority=600 type=ref,event=tag,enable=true,priority=600 type=ref,event=pr,prefix=pr-,enable=true,priority=600 type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{version}} type=sha ${{ steps.latest-version.outputs.VERSION }} - name: Build and export to Docker uses: docker/build-push-action@v4 with: context: . file: docker/Dockerfile load: true push: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} build-args: | VERSION=${{ steps.latest-version.outputs.VERSION }} - name: Test image run: | docker run --rm ${{ steps.fqirp.outputs.FQIRP }} python -c "import espaloma; print(espaloma.__version__)" docker run --rm ${{ steps.fqirp.outputs.FQIRP }} pytest --pyargs espaloma -v - name: Push Docker image uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: context: . file: docker/Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} build-args: | VERSION=${{ steps.latest-version.outputs.VERSION }} - name: Setup Apptainer uses: eWaterCycle/setup-apptainer@v2 with: apptainer-version: 1.1.2 - name: Build Apptainer Image run: singularity build espaloma_${{ steps.latest-version.outputs.VERSION }}.sif docker-daemon:${{ steps.fqirp.outputs.FQIRP }} - name: Test & Push Apptainer Image run: | mkdir test_apptainer cd test_apptainer singularity run ../espaloma_${{ steps.latest-version.outputs.VERSION }}.sif pytest --pyargs espaloma -v echo ${{ secrets.GITHUB_TOKEN }} | singularity remote login -u ${{ secrets.GHCR_USERNAME }} --password-stdin oras://ghcr.io singularity push ../espaloma_${{ steps.latest-version.outputs.VERSION }}.sif oras://${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }}-apptainer ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # data *.sdf *.csv # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # Parm@Frosst download parm_at_Frosst.tgz # misc .DS_Store ================================================ FILE: .readthedocs.yaml ================================================ version: 2 build: os: "ubuntu-20.04" tools: python: "mambaforge-4.10" sphinx: configuration: docs/conf.py fail_on_warning: false conda: environment: devtools/conda-envs/espaloma.yaml python: # Install our python package before building the docs install: - method: pip path: . ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Yuanqing Wang @ choderalab // MSKCC Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: MANIFEST.in ================================================ include LICENSE include MANIFEST.in include versioneer.py graft espaloma global-exclude *.py[cod] __pycache__ *.so ================================================ FILE: README.md ================================================ espaloma: **E**xtensible **S**urrogate **P**otenti**al** **O**ptimized by **M**essage-passing **A**lgorithms 🍹 ============================== [//]: # (Badges) [![CI](https://github.com/choderalab/espaloma/actions/workflows/CI.yaml/badge.svg?branch=main)](https://github.com/choderalab/espaloma/actions/workflows/CI.yaml) [![Documentation Status](https://readthedocs.org/projects/espaloma/badge/?version=latest)](https://espaloma.readthedocs.io/en/latest/?badge=latest) Source code for [Wang Y, Fass J, and Chodera JD "End-to-End Differentiable Construction of Molecular Mechanics Force Fields."](https://arxiv.org/abs/2010.01196) ![abstract](docs/_static/espaloma_abstract_v2-2.png) # Documentation: https://docs.espaloma.org # Paper Abstract Molecular mechanics (MM) potentials have long been a workhorse of computational chemistry. Leveraging accuracy and speed, these functional forms find use in a wide variety of applications in biomolecular modeling and drug discovery, from rapid virtual screening to detailed free energy calculations. Traditionally, MM potentials have relied on human-curated, inflexible, and poorly extensible discrete chemical perception rules _atom types_ for applying parameters to small molecules or biopolymers, making it difficult to optimize both types and parameters to fit quantum chemical or physical property data. Here, we propose an alternative approach that uses _graph neural networks_ to perceive chemical environments, producing continuous atom embeddings from which valence and nonbonded parameters can be predicted using invariance-preserving layers. Since all stages are built from smooth neural functions, the entire process---spanning chemical perception to parameter assignment---is modular and end-to-end differentiable with respect to model parameters, allowing new force fields to be easily constructed, extended, and applied to arbitrary molecules. We show that this approach is not only sufficiently expressive to reproduce legacy atom types, but that it can learn and extend existing molecular mechanics force fields, construct entirely new force fields applicable to both biopolymers and small molecules from quantum chemical calculations, and even learn to accurately predict free energies from experimental observables. # Installation We recommend using [`mamba`](https://mamba.readthedocs.io/en/latest/mamba-installation.html#mamba-installation) which is a drop-in replacement for `conda` and is much faster. ```bash $ mamba create --name espaloma -c conda-forge "espaloma=0.3.2" ``` # Example: Deploy espaloma 0.3.2 pretrained force field to arbitrary MM system ```python # imports import os import torch import espaloma as esp # define or load a molecule of interest via the Open Force Field toolkit from openff.toolkit.topology import Molecule molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") # create an Espaloma Graph object to represent the molecule of interest molecule_graph = esp.Graph(molecule) # load pretrained model espaloma_model = esp.get_model("latest") # apply a trained espaloma model to assign parameters espaloma_model(molecule_graph.heterograph) # create an OpenMM System for the specified molecule openmm_system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph) ``` If using espaloma from a local `.pt` file, say for example `espaloma-0.3.2.pt`, then you would need to run the `eval` method of the model to get the correct inference/predictions, as follows: ```python import torch ... # load local pretrained model espaloma_model = torch.load("espaloma-0.3.2.pt") espaloma_model.eval() ... ``` The rest of the code should be the same as in the previous code block example. # Compatible models Below is a compatibility matrix for different versions of `espaloma` code and `espaloma` models (the `.pt` file). | Model 🧪 | DOI 📝 | Supported Espaloma version 💻 | Release Date 🗓️ | Espaloma architecture change 📐? | |---------------------|-------|------------------------------|----------------|----------------------------------| | `espaloma-0.3.2.pt` | | 0.3.1, 0.3.2, 0.4.0 | Sep 22, 2023 | ✅ No | | `espaloma-0.3.1.pt` | | 0.3.1, 0.3.2, 0.4.0 | Jul 17, 2023 | ⚠️ Yes | | `espaloma-0.3.0.pt` | | 0.3.0 | Apr 26, 2023 | ⚠️Yes | > [!NOTE] > `espaloma-0.3.1.pt` and `espaloma-0.3.2.pt` are the same model. # Using espaloma to parameterize small molecules in relative free energy calculations An example of using espaloma to parameterize small molecules in relative alchemical free energy calculations is provided in the `scripts/perses-benchmark/` directory. # Manifest * `espaloma/` core code for graph-parametrized potential energy functions. * `graphs/` data objects that contain various level of information we need. * `graph.py` base modules for graphs. * `molecule_graph.py` provide APIs to various molecular modelling toolkits. * `homogeneous_graph.py` simplest graph representation of a molecule. * `heterogeneous_graph.py` graph representation of a molecule that contains information regarding membership of lower-level nodes to higher-level nodes. * `parametrized_graph.py` graph representation of a molecule with all parameters needed for energy evaluation. * `nn/` neural network models that facilitates translation between graphs. * `dgl_legacy.py` API to dgl models for atom-level message passing. * `mm/` molecular mechanics functionalities for energy evaluation. * `i/` energy terms used in Class-I force field. * `bond.py` bond energy * `angle.py` angle energy * `torsion.py` torsion energy * `nonbonded.py` nonbonded energy * `ii/` energy terms used in Class-II force field. * `coupling.py` coupling terms * `polynomial.py` higher order polynomials. # License This software is licensed under [MIT license](https://opensource.org/licenses/MIT). # Copyright Copyright (c) 2020, Chodera Lab at Memorial Sloan Kettering Cancer Center and Authors: Authors: - [Yuanqing Wang](http://www.wangyq.net) - Josh Fass - John D. Chodera ================================================ FILE: devtools/README.md ================================================ # Development, testing, and deployment tools This directory contains a collection of tools for running Continuous Integration (CI) tests, conda installation, and other development tools not directly related to the coding process. ## Manifest ### Continuous Integration You should test your code, but do not feel compelled to use these specific programs. You also may not need Unix and Windows testing if you only plan to deploy on specific platforms. These are just to help you get started * `travis-ci`: Linux and OSX based testing through [Travis-CI](https://about.travis-ci.com/) * `before_install.sh`: Pip/Miniconda pre-package installation script for Travis * `appveyor`: Windows based testing through [AppVeyor](https://www.appveyor.com/) (there are no files directly related to this) ### Conda Environment: This directory contains the files to setup the Conda environment for testing purposes * `conda-envs`: directory containing the YAML file(s) which fully describe Conda Environments, their dependencies, and those dependency provenance's * `test_env.yaml`: Simple test environment file with base dependencies. Channels are not specified here and therefore respect global Conda configuration ### Additional Scripts: This directory contains OS agnostic helper scripts which don't fall in any of the previous categories * `scripts` * `create_conda_env.py`: Helper program for spinning up new conda environments based on a starter file with Python Version and Env. Name command-line options ## How to contribute changes - Clone the repository if you have write access to the main repo, fork the repository if you are a collaborator. - Make a new branch with `git checkout -b {your branch name}` - Make changes and test your code - Ensure that the test environment dependencies (`conda-envs`) line up with the build and deploy dependencies (`conda-recipe/meta.yaml`) - Push the branch to the repo (either the main or your fork) with `git push -u origin {your branch name}` * Note that `origin` is the default name assigned to the remote, yours may be different - Make a PR on GitHub with your changes - We'll review the changes and get your code into the repo after lively discussion! ## Checklist for updates - [ ] Make sure there is an/are issue(s) opened for your specific update - [ ] Create the PR, referencing the issue - [ ] Debug the PR as needed until tests pass - [ ] Tag the final, debugged version * `git tag -a X.Y.Z [latest pushed commit] && git push --follow-tags` - [ ] Get the PR merged in ## Versioneer Auto-version [Versioneer](https://github.com/warner/python-versioneer) will automatically infer what version is installed by looking at the `git` tags and how many commits ahead this version is. The format follows [PEP 440](https://www.python.org/dev/peps/pep-0440/) and has the regular expression of: ```regexp \d+.\d+.\d+(?\+\d+-[a-z0-9]+) ``` If the version of this commit is the same as a `git` tag, the installed version is the same as the tag, e.g. `espaloma-0.1.2`, otherwise it will be appended with `+X` where `X` is the number of commits ahead from the last tag, and then `-YYYYYY` where the `Y`'s are replaced with the `git` commit hash. ================================================ FILE: devtools/conda-envs/espaloma.yaml ================================================ name: espaloma-test channels: - conda-forge - openeye dependencies: # Base dependencies - python - pip # 3rd party - openeye-toolkits - numpy - matplotlib - scipy - openff-toolkit >=0.12 - openff-forcefields - openff-units - smirnoff99frosst>=1.1.0.1 #https://github.com/openforcefield/smirnoff99Frosst/issues/109 - openmm - openmmforcefields >=0.11.2 - tqdm - pydantic <2 # We need our deps to fix this - qcportal >=0.50 - dgl =2.3.0 - torchdata <= 0.10.0 # Testing - pytest - pytest-cov - pytest-xdist - pytest-randomly - codecov - nose - nose-timer - coverage - sphinx - sphinx_rtd_theme ================================================ FILE: devtools/conda-recipe/build.sh ================================================ pip install . ================================================ FILE: devtools/conda-recipe/meta.yml ================================================ package: name: espaloma version: !!str 0.0.0 source: path: ../../ build: preserve_egg_dir: True number: 0 requirements: build: - python - setuptools - numpy >=1.14 run: - python - pip - openeye-toolkits - numpy - matplotlib - scipy - openff-toolkit - openff-forcefields - smirnoff99Frosst - openmm - openmmforcefields - pytorch - dgl - pytest - pytest-cov - codecov - nose - nose-timer - coverage - qcportal - torchdata <= 0.10.0 about: home: https://github.com/choderalab/perses license: MIT license_file: LICENSE ================================================ FILE: devtools/gh-actions/initialize_conda.sh ================================================ case $CI_OS in windows*) eval "$(${CONDA}/condabin/conda.bat shell.bash hook)";; *) eval "$(${CONDA}/condabin/conda shell.bash hook)";; esac ================================================ FILE: devtools/scripts/create_conda_env.py ================================================ import argparse import glob import os import re import shutil import subprocess as sp from contextlib import contextmanager from tempfile import TemporaryDirectory # YAML imports try: import yaml # PyYAML loader = yaml.load except ImportError: try: import ruamel_yaml as yaml # Ruamel YAML except ImportError: try: # Load Ruamel YAML from the base conda environment from importlib import util as import_util CONDA_BIN = os.path.dirname(os.environ['CONDA_EXE']) ruamel_yaml_path = glob.glob(os.path.join(CONDA_BIN, '..', 'lib', 'python*.*', 'site-packages', 'ruamel_yaml', '__init__.py'))[0] # Based on importlib example, but only needs to load_module since its the whole package, not just # a module spec = import_util.spec_from_file_location('ruamel_yaml', ruamel_yaml_path) yaml = spec.loader.load_module() except (KeyError, ImportError, IndexError): raise ImportError("No YAML parser could be found in this or the conda environment. " "Could not find PyYAML or Ruamel YAML in the current environment, " "AND could not find Ruamel YAML in the base conda environment through CONDA_EXE path. " "Environment not created!") loader = yaml.YAML(typ="safe").load # typ="safe" avoids odd typing on output @contextmanager def temp_cd(): """Temporary CD Helper""" cwd = os.getcwd() with TemporaryDirectory() as td: try: os.chdir(td) yield finally: os.chdir(cwd) # Args parser = argparse.ArgumentParser(description='Creates a conda environment from file for a given Python version.') parser.add_argument('-n', '--name', type=str, help='The name of the created Python environment') parser.add_argument('-p', '--python', type=str, help='The version of the created Python environment') parser.add_argument('conda_file', help='The file for the created Python environment') args = parser.parse_args() # Open the base file with open(args.conda_file, "r") as handle: yaml_script = loader(handle.read()) python_replacement_string = "python {}*".format(args.python) try: for dep_index, dep_value in enumerate(yaml_script['dependencies']): if re.match('python([ ><=*]+[0-9.*]*)?$', dep_value): # Match explicitly 'python' and its formats yaml_script['dependencies'].pop(dep_index) break # Making the assumption there is only one Python entry, also avoids need to enumerate in reverse except (KeyError, TypeError): # Case of no dependencies key, or dependencies: None yaml_script['dependencies'] = [] finally: # Ensure the python version is added in. Even if the code does not need it, we assume the env does yaml_script['dependencies'].insert(0, python_replacement_string) # Figure out conda path if "CONDA_EXE" in os.environ: conda_path = os.environ["CONDA_EXE"] else: conda_path = shutil.which("conda") if conda_path is None: raise RuntimeError("Could not find a conda binary in CONDA_EXE variable or in executable search path") print("CONDA ENV NAME {}".format(args.name)) print("PYTHON VERSION {}".format(args.python)) print("CONDA FILE NAME {}".format(args.conda_file)) print("CONDA PATH {}".format(conda_path)) # Write to a temp directory which will always be cleaned up with temp_cd(): temp_file_name = "temp_script.yaml" with open(temp_file_name, 'w') as f: f.write(yaml.dump(yaml_script)) sp.call("{} env create -n {} -f {}".format(conda_path, args.name, temp_file_name), shell=True) ================================================ FILE: docker/Dockerfile ================================================ FROM mambaorg/micromamba:1.4.9 LABEL org.opencontainers.image.source=https://github.com/choderalab/espaloma LABEL org.opencontainers.image.description="Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm" LABEL org.opencontainers.image.licenses=MIT # OpenFE Version we want to build ARG VERSION # Don't buffer stdout & stderr streams, so if there is a crash no partial buffer output is lost # https://docs.python.org/3/using/cmdline.html#cmdoption-u ENV PYTHONUNBUFFERED=1 RUN micromamba install -y -n base -c conda-forge -c dglteam pytest "dgl<1" git "espaloma==$VERSION" && \ micromamba clean --all --yes # Ensure that conda environment is automatically activated # https://github.com/mamba-org/micromamba-docker#running-commands-in-dockerfile-within-the-conda-environment ARG MAMBA_DOCKERFILE_ACTIVATE=1 ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = espaloma SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/README.md ================================================ # Compiling espaloma's Documentation The docs for this project are built with [Sphinx](http://www.sphinx-doc.org/en/master/). To compile the docs, first ensure that Sphinx and the ReadTheDocs theme are installed. ```bash conda install sphinx sphinx_rtd_theme ``` Once installed, you can use the `Makefile` in this directory to compile static HTML pages by ```bash make html ``` The compiled docs will be in the `_build` directory and can be viewed by opening `index.html` (which may itself be inside a directory called `html/` depending on what version of Sphinx is installed). ================================================ FILE: docs/_static/README.md ================================================ # Static Doc Directory Add any paths that contain custom static files (such as style sheets) here, relative to the `conf.py` file's directory. They are copied after the builtin static files, so a file named "default.css" will overwrite the builtin "default.css". The path to this folder is set in the Sphinx `conf.py` file in the line: ```python templates_path = ['_static'] ``` ## Examples of file to add to this directory * Custom Cascading Style Sheets * Custom JavaScript code * Static logo images ================================================ FILE: docs/_templates/README.md ================================================ # Templates Doc Directory Add any paths that contain templates here, relative to the `conf.py` file's directory. They are copied after the builtin template files, so a file named "page.html" will overwrite the builtin "page.html". The path to this folder is set in the Sphinx `conf.py` file in the line: ```python html_static_path = ['_templates'] ``` ## Examples of file to add to this directory * HTML extensions of stock pages like `page.html` or `layout.html` ================================================ FILE: docs/_templates/custom-class-template.rst ================================================ {{ fullname | escape | underline}} .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} :members: :show-inheritance: :inherited-members: {% block methods %} .. automethod:: __init__ {% if methods %} .. rubric:: {{ _('Methods') }} .. autosummary:: {% for item in methods %} ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% endblock %} {% block attributes %} {% if attributes %} .. rubric:: {{ _('Attributes') }} .. autosummary:: {% for item in attributes %} ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% endblock %} ================================================ FILE: docs/_templates/custom-module-template.rst ================================================ {{ fullname | escape | underline}} .. automodule:: {{ fullname }} {% block attributes %} {% if attributes %} .. rubric:: Module Attributes .. autosummary:: :toctree: {% for item in attributes %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block functions %} {% if functions %} .. rubric:: {{ _('Functions') }} .. autosummary:: :toctree: {% for item in functions %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block classes %} {% if classes %} .. rubric:: {{ _('Classes') }} .. autosummary:: :toctree: :template: custom-class-template.rst {% for item in classes %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block exceptions %} {% if exceptions %} .. rubric:: {{ _('Exceptions') }} .. autosummary:: :toctree: {% for item in exceptions %} {{ item }} {%- endfor %} {% endif %} {% endblock %} {% block modules %} {% if modules %} .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: {% for item in modules %} {{ item }} {%- endfor %} {% endif %} {% endblock %} ================================================ FILE: docs/api.rst ================================================ API Documentation ================= .. autosummary:: :toctree: autosummary :template: custom-module-template.rst :recursive: espaloma.mm espaloma.nn espaloma.graphs espaloma.data ================================================ FILE: docs/autosummary/espaloma.data.collection.alkethoh.rst ================================================ espaloma.data.collection.alkethoh ================================= .. currentmodule:: espaloma.data.collection .. autofunction:: alkethoh ================================================ FILE: docs/autosummary/espaloma.data.collection.esol.rst ================================================ espaloma.data.collection.esol ============================= .. currentmodule:: espaloma.data.collection .. autofunction:: esol ================================================ FILE: docs/autosummary/espaloma.data.collection.md17_new.rst ================================================ espaloma.data.collection.md17\_new ================================== .. currentmodule:: espaloma.data.collection .. autofunction:: md17_new ================================================ FILE: docs/autosummary/espaloma.data.collection.md17_old.rst ================================================ espaloma.data.collection.md17\_old ================================== .. currentmodule:: espaloma.data.collection .. autofunction:: md17_old ================================================ FILE: docs/autosummary/espaloma.data.collection.qca.rst ================================================ espaloma.data.collection.qca ============================ .. currentmodule:: espaloma.data.collection .. autoclass:: qca :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~qca.__init__ ~qca.bayer ~qca.benchmark ~qca.coverage ~qca.emolecules ~qca.fda ~qca.pfizer ~qca.roche ================================================ FILE: docs/autosummary/espaloma.data.collection.rst ================================================ espaloma.data.collection ======================== .. automodule:: espaloma.data.collection .. rubric:: Functions .. autosummary:: :toctree: alkethoh esol md17_new md17_old zinc .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst qca ================================================ FILE: docs/autosummary/espaloma.data.collection.zinc.rst ================================================ espaloma.data.collection.zinc ============================= .. currentmodule:: espaloma.data.collection .. autofunction:: zinc ================================================ FILE: docs/autosummary/espaloma.data.dataset.Dataset.rst ================================================ espaloma.data.dataset.Dataset ============================= .. currentmodule:: espaloma.data.dataset .. autoclass:: Dataset :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~Dataset.__init__ ~Dataset.apply ~Dataset.load ~Dataset.save ~Dataset.shuffle ~Dataset.split ~Dataset.subsample ================================================ FILE: docs/autosummary/espaloma.data.dataset.GraphDataset.rst ================================================ espaloma.data.dataset.GraphDataset ================================== .. currentmodule:: espaloma.data.dataset .. autoclass:: GraphDataset :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~GraphDataset.__init__ ~GraphDataset.apply ~GraphDataset.batch ~GraphDataset.load ~GraphDataset.save ~GraphDataset.shuffle ~GraphDataset.split ~GraphDataset.subsample ~GraphDataset.view ================================================ FILE: docs/autosummary/espaloma.data.dataset.rst ================================================ espaloma.data.dataset ===================== .. automodule:: espaloma.data.dataset .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst Dataset GraphDataset ================================================ FILE: docs/autosummary/espaloma.data.md.MoleculeVacuumSimulation.rst ================================================ espaloma.data.md.MoleculeVacuumSimulation ========================================= .. currentmodule:: espaloma.data.md .. autoclass:: MoleculeVacuumSimulation :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~MoleculeVacuumSimulation.__init__ ~MoleculeVacuumSimulation.run ~MoleculeVacuumSimulation.simulation_from_graph ================================================ FILE: docs/autosummary/espaloma.data.md.rst ================================================ espaloma.data.md ================ .. automodule:: espaloma.data.md .. rubric:: Functions .. autosummary:: :toctree: subtract_nonbonded_force subtract_nonbonded_force_except_14 .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst MoleculeVacuumSimulation ================================================ FILE: docs/autosummary/espaloma.data.md.subtract_nonbonded_force.rst ================================================ espaloma.data.md.subtract\_nonbonded\_force =========================================== .. currentmodule:: espaloma.data.md .. autofunction:: subtract_nonbonded_force ================================================ FILE: docs/autosummary/espaloma.data.md.subtract_nonbonded_force_except_14.rst ================================================ espaloma.data.md.subtract\_nonbonded\_force\_except\_14 ======================================================= .. currentmodule:: espaloma.data.md .. autofunction:: subtract_nonbonded_force_except_14 ================================================ FILE: docs/autosummary/espaloma.data.md17_utils.get_molecule.rst ================================================ espaloma.data.md17\_utils.get\_molecule ======================================= .. currentmodule:: espaloma.data.md17_utils .. autofunction:: get_molecule ================================================ FILE: docs/autosummary/espaloma.data.md17_utils.realize_molecule.rst ================================================ espaloma.data.md17\_utils.realize\_molecule =========================================== .. currentmodule:: espaloma.data.md17_utils .. autofunction:: realize_molecule ================================================ FILE: docs/autosummary/espaloma.data.md17_utils.rst ================================================ espaloma.data.md17\_utils ========================= .. automodule:: espaloma.data.md17_utils .. rubric:: Functions .. autosummary:: :toctree: get_molecule realize_molecule sum_offsets ================================================ FILE: docs/autosummary/espaloma.data.md17_utils.sum_offsets.rst ================================================ espaloma.data.md17\_utils.sum\_offsets ====================================== .. currentmodule:: espaloma.data.md17_utils .. autofunction:: sum_offsets ================================================ FILE: docs/autosummary/espaloma.data.normalize.BaseNormalize.rst ================================================ espaloma.data.normalize.BaseNormalize ===================================== .. currentmodule:: espaloma.data.normalize .. autoclass:: BaseNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~BaseNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.DatasetLogNormalNormalize.rst ================================================ espaloma.data.normalize.DatasetLogNormalNormalize ================================================= .. currentmodule:: espaloma.data.normalize .. autoclass:: DatasetLogNormalNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~DatasetLogNormalNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.DatasetNormalNormalize.rst ================================================ espaloma.data.normalize.DatasetNormalNormalize ============================================== .. currentmodule:: espaloma.data.normalize .. autoclass:: DatasetNormalNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~DatasetNormalNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.ESOL100LogNormalNormalize.rst ================================================ espaloma.data.normalize.ESOL100LogNormalNormalize ================================================= .. currentmodule:: espaloma.data.normalize .. autoclass:: ESOL100LogNormalNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~ESOL100LogNormalNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.ESOL100NormalNormalize.rst ================================================ espaloma.data.normalize.ESOL100NormalNormalize ============================================== .. currentmodule:: espaloma.data.normalize .. autoclass:: ESOL100NormalNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~ESOL100NormalNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.NotNormalize.rst ================================================ espaloma.data.normalize.NotNormalize ==================================== .. currentmodule:: espaloma.data.normalize .. autoclass:: NotNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~NotNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.PositiveNotNormalize.rst ================================================ espaloma.data.normalize.PositiveNotNormalize ============================================ .. currentmodule:: espaloma.data.normalize .. autoclass:: PositiveNotNormalize :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~PositiveNotNormalize.__init__ ================================================ FILE: docs/autosummary/espaloma.data.normalize.rst ================================================ espaloma.data.normalize ======================= .. automodule:: espaloma.data.normalize .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst BaseNormalize DatasetLogNormalNormalize DatasetNormalNormalize ESOL100LogNormalNormalize ESOL100NormalNormalize NotNormalize PositiveNotNormalize ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.MolWithTargets.rst ================================================ espaloma.data.qcarchive\_utils.MolWithTargets ============================================= .. currentmodule:: espaloma.data.qcarchive_utils .. autoclass:: MolWithTargets :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~MolWithTargets.__init__ ~MolWithTargets.count ~MolWithTargets.index .. rubric:: Attributes .. autosummary:: ~MolWithTargets.energies ~MolWithTargets.gradients ~MolWithTargets.offmol ~MolWithTargets.xyz ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.breakdown_along_time_axis.rst ================================================ espaloma.data.qcarchive\_utils.breakdown\_along\_time\_axis =========================================================== .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: breakdown_along_time_axis ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.fetch_td_record.rst ================================================ espaloma.data.qcarchive\_utils.fetch\_td\_record ================================================ .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: fetch_td_record ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_client.rst ================================================ espaloma.data.qcarchive\_utils.get\_client ========================================== .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: get_client ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_collection.rst ================================================ espaloma.data.qcarchive\_utils.get\_collection ============================================== .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: get_collection ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_energy_and_gradient.rst ================================================ espaloma.data.qcarchive\_utils.get\_energy\_and\_gradient ========================================================= .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: get_energy_and_gradient ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.get_graph.rst ================================================ espaloma.data.qcarchive\_utils.get\_graph ========================================= .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: get_graph ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.h5_to_dataset.rst ================================================ espaloma.data.qcarchive\_utils.h5\_to\_dataset ============================================== .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: h5_to_dataset ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.make_batch_size_consistent.rst ================================================ espaloma.data.qcarchive\_utils.make\_batch\_size\_consistent ============================================================ .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: make_batch_size_consistent ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.rst ================================================ espaloma.data.qcarchive\_utils ============================== .. automodule:: espaloma.data.qcarchive_utils .. rubric:: Functions .. autosummary:: :toctree: breakdown_along_time_axis fetch_td_record get_client get_collection get_energy_and_gradient get_graph h5_to_dataset make_batch_size_consistent weight_by_snapshots .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst MolWithTargets ================================================ FILE: docs/autosummary/espaloma.data.qcarchive_utils.weight_by_snapshots.rst ================================================ espaloma.data.qcarchive\_utils.weight\_by\_snapshots ==================================================== .. currentmodule:: espaloma.data.qcarchive_utils .. autofunction:: weight_by_snapshots ================================================ FILE: docs/autosummary/espaloma.data.rst ================================================ espaloma.data ============= .. automodule:: espaloma.data .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.data.collection espaloma.data.dataset espaloma.data.md espaloma.data.md17_utils espaloma.data.normalize espaloma.data.qcarchive_utils espaloma.data.utils ================================================ FILE: docs/autosummary/espaloma.data.utils.batch.rst ================================================ espaloma.data.utils.batch ========================= .. currentmodule:: espaloma.data.utils .. autofunction:: batch ================================================ FILE: docs/autosummary/espaloma.data.utils.collate_fn.rst ================================================ espaloma.data.utils.collate\_fn =============================== .. currentmodule:: espaloma.data.utils .. autofunction:: collate_fn ================================================ FILE: docs/autosummary/espaloma.data.utils.from_csv.rst ================================================ espaloma.data.utils.from\_csv ============================= .. currentmodule:: espaloma.data.utils .. autofunction:: from_csv ================================================ FILE: docs/autosummary/espaloma.data.utils.infer_mol_from_coordinates.rst ================================================ espaloma.data.utils.infer\_mol\_from\_coordinates ================================================= .. currentmodule:: espaloma.data.utils .. autofunction:: infer_mol_from_coordinates ================================================ FILE: docs/autosummary/espaloma.data.utils.make_temp_directory.rst ================================================ espaloma.data.utils.make\_temp\_directory ========================================= .. currentmodule:: espaloma.data.utils .. autofunction:: make_temp_directory ================================================ FILE: docs/autosummary/espaloma.data.utils.normalize.rst ================================================ espaloma.data.utils.normalize ============================= .. currentmodule:: espaloma.data.utils .. autofunction:: normalize ================================================ FILE: docs/autosummary/espaloma.data.utils.rst ================================================ espaloma.data.utils =================== .. automodule:: espaloma.data.utils .. rubric:: Functions .. autosummary:: :toctree: batch collate_fn from_csv infer_mol_from_coordinates make_temp_directory normalize split sum_offsets ================================================ FILE: docs/autosummary/espaloma.data.utils.split.rst ================================================ espaloma.data.utils.split ========================= .. currentmodule:: espaloma.data.utils .. autofunction:: split ================================================ FILE: docs/autosummary/espaloma.data.utils.sum_offsets.rst ================================================ espaloma.data.utils.sum\_offsets ================================ .. currentmodule:: espaloma.data.utils .. autofunction:: sum_offsets ================================================ FILE: docs/autosummary/espaloma.graphs.deploy.load_forcefield.rst ================================================ espaloma.graphs.deploy.load\_forcefield ======================================= .. currentmodule:: espaloma.graphs.deploy .. autofunction:: load_forcefield ================================================ FILE: docs/autosummary/espaloma.graphs.deploy.openmm_system_from_graph.rst ================================================ espaloma.graphs.deploy.openmm\_system\_from\_graph ================================================== .. currentmodule:: espaloma.graphs.deploy .. autofunction:: openmm_system_from_graph ================================================ FILE: docs/autosummary/espaloma.graphs.deploy.rst ================================================ espaloma.graphs.deploy ====================== .. automodule:: espaloma.graphs.deploy .. rubric:: Functions .. autosummary:: :toctree: load_forcefield openmm_system_from_graph ================================================ FILE: docs/autosummary/espaloma.graphs.graph.BaseGraph.rst ================================================ espaloma.graphs.graph.BaseGraph =============================== .. currentmodule:: espaloma.graphs.graph .. autoclass:: BaseGraph :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~BaseGraph.__init__ ================================================ FILE: docs/autosummary/espaloma.graphs.graph.Graph.rst ================================================ espaloma.graphs.graph.Graph =========================== .. currentmodule:: espaloma.graphs.graph .. autoclass:: Graph :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~Graph.__init__ ~Graph.get_heterograph_from_graph_and_mol ~Graph.get_homograph_from_mol ~Graph.load ~Graph.save .. rubric:: Attributes .. autosummary:: ~Graph.edata ~Graph.ndata ~Graph.nodes ================================================ FILE: docs/autosummary/espaloma.graphs.graph.rst ================================================ espaloma.graphs.graph ===================== .. automodule:: espaloma.graphs.graph .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst BaseGraph Graph ================================================ FILE: docs/autosummary/espaloma.graphs.legacy_force_field.LegacyForceField.rst ================================================ espaloma.graphs.legacy\_force\_field.LegacyForceField ===================================================== .. currentmodule:: espaloma.graphs.legacy_force_field .. autoclass:: LegacyForceField :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~LegacyForceField.__init__ ~LegacyForceField.baseline_energy ~LegacyForceField.multi_typing ~LegacyForceField.parametrize ~LegacyForceField.typing ================================================ FILE: docs/autosummary/espaloma.graphs.legacy_force_field.rst ================================================ espaloma.graphs.legacy\_force\_field ==================================== .. automodule:: espaloma.graphs.legacy_force_field .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst LegacyForceField ================================================ FILE: docs/autosummary/espaloma.graphs.rst ================================================ espaloma.graphs =============== .. automodule:: espaloma.graphs .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.graphs.deploy espaloma.graphs.graph espaloma.graphs.legacy_force_field espaloma.graphs.utils ================================================ FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.angle_indices.rst ================================================ espaloma.graphs.utils.offmol\_indices.angle\_indices ==================================================== .. currentmodule:: espaloma.graphs.utils.offmol_indices .. autofunction:: angle_indices ================================================ FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.atom_indices.rst ================================================ espaloma.graphs.utils.offmol\_indices.atom\_indices =================================================== .. currentmodule:: espaloma.graphs.utils.offmol_indices .. autofunction:: atom_indices ================================================ FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.bond_indices.rst ================================================ espaloma.graphs.utils.offmol\_indices.bond\_indices =================================================== .. currentmodule:: espaloma.graphs.utils.offmol_indices .. autofunction:: bond_indices ================================================ FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.improper_torsion_indices.rst ================================================ espaloma.graphs.utils.offmol\_indices.improper\_torsion\_indices ================================================================ .. currentmodule:: espaloma.graphs.utils.offmol_indices .. autofunction:: improper_torsion_indices ================================================ FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.proper_torsion_indices.rst ================================================ espaloma.graphs.utils.offmol\_indices.proper\_torsion\_indices ============================================================== .. currentmodule:: espaloma.graphs.utils.offmol_indices .. autofunction:: proper_torsion_indices ================================================ FILE: docs/autosummary/espaloma.graphs.utils.offmol_indices.rst ================================================ espaloma.graphs.utils.offmol\_indices ===================================== .. automodule:: espaloma.graphs.utils.offmol_indices .. rubric:: Functions .. autosummary:: :toctree: angle_indices atom_indices bond_indices improper_torsion_indices proper_torsion_indices ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.duplicate_index_ordering.rst ================================================ espaloma.graphs.utils.read\_heterogeneous\_graph.duplicate\_index\_ordering =========================================================================== .. currentmodule:: espaloma.graphs.utils.read_heterogeneous_graph .. autofunction:: duplicate_index_ordering ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.from_homogeneous_and_mol.rst ================================================ espaloma.graphs.utils.read\_heterogeneous\_graph.from\_homogeneous\_and\_mol ============================================================================ .. currentmodule:: espaloma.graphs.utils.read_heterogeneous_graph .. autofunction:: from_homogeneous_and_mol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.relationship_indices_from_offmol.rst ================================================ espaloma.graphs.utils.read\_heterogeneous\_graph.relationship\_indices\_from\_offmol ==================================================================================== .. currentmodule:: espaloma.graphs.utils.read_heterogeneous_graph .. autofunction:: relationship_indices_from_offmol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_heterogeneous_graph.rst ================================================ espaloma.graphs.utils.read\_heterogeneous\_graph ================================================ .. automodule:: espaloma.graphs.utils.read_heterogeneous_graph .. rubric:: Functions .. autosummary:: :toctree: duplicate_index_ordering from_homogeneous_and_mol relationship_indices_from_offmol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.fp_oe.rst ================================================ espaloma.graphs.utils.read\_homogeneous\_graph.fp\_oe ===================================================== .. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph .. autofunction:: fp_oe ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.fp_rdkit.rst ================================================ espaloma.graphs.utils.read\_homogeneous\_graph.fp\_rdkit ======================================================== .. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph .. autofunction:: fp_rdkit ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.from_oemol.rst ================================================ espaloma.graphs.utils.read\_homogeneous\_graph.from\_oemol ========================================================== .. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph .. autofunction:: from_oemol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.from_openff_toolkit_mol.rst ================================================ espaloma.graphs.utils.read\_homogeneous\_graph.from\_openff\_toolkit\_mol ========================================================================= .. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph .. autofunction:: from_openff_toolkit_mol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.from_rdkit_mol.rst ================================================ espaloma.graphs.utils.read\_homogeneous\_graph.from\_rdkit\_mol =============================================================== .. currentmodule:: espaloma.graphs.utils.read_homogeneous_graph .. autofunction:: from_rdkit_mol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.read_homogeneous_graph.rst ================================================ espaloma.graphs.utils.read\_homogeneous\_graph ============================================== .. automodule:: espaloma.graphs.utils.read_homogeneous_graph .. rubric:: Functions .. autosummary:: :toctree: fp_oe fp_rdkit from_oemol from_openff_toolkit_mol from_rdkit_mol ================================================ FILE: docs/autosummary/espaloma.graphs.utils.rst ================================================ espaloma.graphs.utils ===================== .. automodule:: espaloma.graphs.utils .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.graphs.utils.offmol_indices espaloma.graphs.utils.read_heterogeneous_graph espaloma.graphs.utils.read_homogeneous_graph ================================================ FILE: docs/autosummary/espaloma.mm.angle.angle_high.rst ================================================ espaloma.mm.angle.angle\_high ============================= .. currentmodule:: espaloma.mm.angle .. autofunction:: angle_high ================================================ FILE: docs/autosummary/espaloma.mm.angle.bond_angle.rst ================================================ espaloma.mm.angle.bond\_angle ============================= .. currentmodule:: espaloma.mm.angle .. autofunction:: bond_angle ================================================ FILE: docs/autosummary/espaloma.mm.angle.bond_bond.rst ================================================ espaloma.mm.angle.bond\_bond ============================ .. currentmodule:: espaloma.mm.angle .. autofunction:: bond_bond ================================================ FILE: docs/autosummary/espaloma.mm.angle.harmonic_angle.rst ================================================ espaloma.mm.angle.harmonic\_angle ================================= .. currentmodule:: espaloma.mm.angle .. autofunction:: harmonic_angle ================================================ FILE: docs/autosummary/espaloma.mm.angle.linear_mixture_angle.rst ================================================ espaloma.mm.angle.linear\_mixture\_angle ======================================== .. currentmodule:: espaloma.mm.angle .. autofunction:: linear_mixture_angle ================================================ FILE: docs/autosummary/espaloma.mm.angle.rst ================================================ espaloma.mm.angle ================= .. automodule:: espaloma.mm.angle .. rubric:: Functions .. autosummary:: :toctree: angle_high bond_angle bond_bond harmonic_angle linear_mixture_angle urey_bradley ================================================ FILE: docs/autosummary/espaloma.mm.angle.urey_bradley.rst ================================================ espaloma.mm.angle.urey\_bradley =============================== .. currentmodule:: espaloma.mm.angle .. autofunction:: urey_bradley ================================================ FILE: docs/autosummary/espaloma.mm.bond.bond_high.rst ================================================ espaloma.mm.bond.bond\_high =========================== .. currentmodule:: espaloma.mm.bond .. autofunction:: bond_high ================================================ FILE: docs/autosummary/espaloma.mm.bond.gaussian_bond.rst ================================================ espaloma.mm.bond.gaussian\_bond =============================== .. currentmodule:: espaloma.mm.bond .. autofunction:: gaussian_bond ================================================ FILE: docs/autosummary/espaloma.mm.bond.harmonic_bond.rst ================================================ espaloma.mm.bond.harmonic\_bond =============================== .. currentmodule:: espaloma.mm.bond .. autofunction:: harmonic_bond ================================================ FILE: docs/autosummary/espaloma.mm.bond.linear_mixture_bond.rst ================================================ espaloma.mm.bond.linear\_mixture\_bond ====================================== .. currentmodule:: espaloma.mm.bond .. autofunction:: linear_mixture_bond ================================================ FILE: docs/autosummary/espaloma.mm.bond.rst ================================================ espaloma.mm.bond ================ .. automodule:: espaloma.mm.bond .. rubric:: Functions .. autosummary:: :toctree: bond_high gaussian_bond harmonic_bond linear_mixture_bond ================================================ FILE: docs/autosummary/espaloma.mm.energy.CarryII.rst ================================================ espaloma.mm.energy.CarryII ========================== .. currentmodule:: espaloma.mm.energy .. autoclass:: CarryII :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~CarryII.__init__ ~CarryII.add_module ~CarryII.apply ~CarryII.bfloat16 ~CarryII.buffers ~CarryII.children ~CarryII.cpu ~CarryII.cuda ~CarryII.double ~CarryII.eval ~CarryII.extra_repr ~CarryII.float ~CarryII.forward ~CarryII.half ~CarryII.load_state_dict ~CarryII.modules ~CarryII.named_buffers ~CarryII.named_children ~CarryII.named_modules ~CarryII.named_parameters ~CarryII.parameters ~CarryII.register_backward_hook ~CarryII.register_buffer ~CarryII.register_forward_hook ~CarryII.register_forward_pre_hook ~CarryII.register_parameter ~CarryII.requires_grad_ ~CarryII.share_memory ~CarryII.state_dict ~CarryII.to ~CarryII.train ~CarryII.type ~CarryII.zero_grad .. rubric:: Attributes .. autosummary:: ~CarryII.T_destination ~CarryII.dump_patches ================================================ FILE: docs/autosummary/espaloma.mm.energy.EnergyInGraph.rst ================================================ espaloma.mm.energy.EnergyInGraph ================================ .. currentmodule:: espaloma.mm.energy .. autoclass:: EnergyInGraph :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~EnergyInGraph.__init__ ~EnergyInGraph.add_module ~EnergyInGraph.apply ~EnergyInGraph.bfloat16 ~EnergyInGraph.buffers ~EnergyInGraph.children ~EnergyInGraph.cpu ~EnergyInGraph.cuda ~EnergyInGraph.double ~EnergyInGraph.eval ~EnergyInGraph.extra_repr ~EnergyInGraph.float ~EnergyInGraph.forward ~EnergyInGraph.half ~EnergyInGraph.load_state_dict ~EnergyInGraph.modules ~EnergyInGraph.named_buffers ~EnergyInGraph.named_children ~EnergyInGraph.named_modules ~EnergyInGraph.named_parameters ~EnergyInGraph.parameters ~EnergyInGraph.register_backward_hook ~EnergyInGraph.register_buffer ~EnergyInGraph.register_forward_hook ~EnergyInGraph.register_forward_pre_hook ~EnergyInGraph.register_parameter ~EnergyInGraph.requires_grad_ ~EnergyInGraph.share_memory ~EnergyInGraph.state_dict ~EnergyInGraph.to ~EnergyInGraph.train ~EnergyInGraph.type ~EnergyInGraph.zero_grad .. rubric:: Attributes .. autosummary:: ~EnergyInGraph.T_destination ~EnergyInGraph.dump_patches ================================================ FILE: docs/autosummary/espaloma.mm.energy.EnergyInGraphII.rst ================================================ espaloma.mm.energy.EnergyInGraphII ================================== .. currentmodule:: espaloma.mm.energy .. autoclass:: EnergyInGraphII :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~EnergyInGraphII.__init__ ~EnergyInGraphII.add_module ~EnergyInGraphII.apply ~EnergyInGraphII.bfloat16 ~EnergyInGraphII.buffers ~EnergyInGraphII.children ~EnergyInGraphII.cpu ~EnergyInGraphII.cuda ~EnergyInGraphII.double ~EnergyInGraphII.eval ~EnergyInGraphII.extra_repr ~EnergyInGraphII.float ~EnergyInGraphII.forward ~EnergyInGraphII.half ~EnergyInGraphII.load_state_dict ~EnergyInGraphII.modules ~EnergyInGraphII.named_buffers ~EnergyInGraphII.named_children ~EnergyInGraphII.named_modules ~EnergyInGraphII.named_parameters ~EnergyInGraphII.parameters ~EnergyInGraphII.register_backward_hook ~EnergyInGraphII.register_buffer ~EnergyInGraphII.register_forward_hook ~EnergyInGraphII.register_forward_pre_hook ~EnergyInGraphII.register_parameter ~EnergyInGraphII.requires_grad_ ~EnergyInGraphII.share_memory ~EnergyInGraphII.state_dict ~EnergyInGraphII.to ~EnergyInGraphII.train ~EnergyInGraphII.type ~EnergyInGraphII.zero_grad .. rubric:: Attributes .. autosummary:: ~EnergyInGraphII.T_destination ~EnergyInGraphII.dump_patches ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_angle.rst ================================================ espaloma.mm.energy.apply\_angle =============================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_angle ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_angle_ii.rst ================================================ espaloma.mm.energy.apply\_angle\_ii =================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_angle_ii ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_angle_linear_mixture.rst ================================================ espaloma.mm.energy.apply\_angle\_linear\_mixture ================================================ .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_angle_linear_mixture ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_bond.rst ================================================ espaloma.mm.energy.apply\_bond ============================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_bond ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_bond_gaussian.rst ================================================ espaloma.mm.energy.apply\_bond\_gaussian ======================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_bond_gaussian ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_bond_ii.rst ================================================ espaloma.mm.energy.apply\_bond\_ii ================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_bond_ii ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_bond_linear_mixture.rst ================================================ espaloma.mm.energy.apply\_bond\_linear\_mixture =============================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_bond_linear_mixture ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_improper_torsion.rst ================================================ espaloma.mm.energy.apply\_improper\_torsion =========================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_improper_torsion ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_nonbonded.rst ================================================ espaloma.mm.energy.apply\_nonbonded =================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_nonbonded ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_torsion.rst ================================================ espaloma.mm.energy.apply\_torsion ================================= .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_torsion ================================================ FILE: docs/autosummary/espaloma.mm.energy.apply_torsion_ii.rst ================================================ espaloma.mm.energy.apply\_torsion\_ii ===================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: apply_torsion_ii ================================================ FILE: docs/autosummary/espaloma.mm.energy.energy_in_graph.rst ================================================ espaloma.mm.energy.energy\_in\_graph ==================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: energy_in_graph ================================================ FILE: docs/autosummary/espaloma.mm.energy.energy_in_graph_ii.rst ================================================ espaloma.mm.energy.energy\_in\_graph\_ii ======================================== .. currentmodule:: espaloma.mm.energy .. autofunction:: energy_in_graph_ii ================================================ FILE: docs/autosummary/espaloma.mm.energy.rst ================================================ espaloma.mm.energy ================== .. automodule:: espaloma.mm.energy .. rubric:: Functions .. autosummary:: :toctree: apply_angle apply_angle_ii apply_angle_linear_mixture apply_bond apply_bond_gaussian apply_bond_ii apply_bond_linear_mixture apply_improper_torsion apply_nonbonded apply_torsion apply_torsion_ii energy_in_graph energy_in_graph_ii .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst CarryII EnergyInGraph EnergyInGraphII ================================================ FILE: docs/autosummary/espaloma.mm.functional.gaussian.rst ================================================ espaloma.mm.functional.gaussian =============================== .. currentmodule:: espaloma.mm.functional .. autofunction:: gaussian ================================================ FILE: docs/autosummary/espaloma.mm.functional.harmonic.rst ================================================ espaloma.mm.functional.harmonic =============================== .. currentmodule:: espaloma.mm.functional .. autofunction:: harmonic ================================================ FILE: docs/autosummary/espaloma.mm.functional.harmonic_harmonic_coupled.rst ================================================ espaloma.mm.functional.harmonic\_harmonic\_coupled ================================================== .. currentmodule:: espaloma.mm.functional .. autofunction:: harmonic_harmonic_coupled ================================================ FILE: docs/autosummary/espaloma.mm.functional.harmonic_harmonic_periodic_coupled.rst ================================================ espaloma.mm.functional.harmonic\_harmonic\_periodic\_coupled ============================================================ .. currentmodule:: espaloma.mm.functional .. autofunction:: harmonic_harmonic_periodic_coupled ================================================ FILE: docs/autosummary/espaloma.mm.functional.harmonic_periodic_coupled.rst ================================================ espaloma.mm.functional.harmonic\_periodic\_coupled ================================================== .. currentmodule:: espaloma.mm.functional .. autofunction:: harmonic_periodic_coupled ================================================ FILE: docs/autosummary/espaloma.mm.functional.linear_mixture.rst ================================================ espaloma.mm.functional.linear\_mixture ====================================== .. currentmodule:: espaloma.mm.functional .. autofunction:: linear_mixture ================================================ FILE: docs/autosummary/espaloma.mm.functional.linear_mixture_to_original.rst ================================================ espaloma.mm.functional.linear\_mixture\_to\_original ==================================================== .. currentmodule:: espaloma.mm.functional .. autofunction:: linear_mixture_to_original ================================================ FILE: docs/autosummary/espaloma.mm.functional.lj.rst ================================================ espaloma.mm.functional.lj ========================= .. currentmodule:: espaloma.mm.functional .. autofunction:: lj ================================================ FILE: docs/autosummary/espaloma.mm.functional.periodic.rst ================================================ espaloma.mm.functional.periodic =============================== .. currentmodule:: espaloma.mm.functional .. autofunction:: periodic ================================================ FILE: docs/autosummary/espaloma.mm.functional.periodic_fixed_phases.rst ================================================ espaloma.mm.functional.periodic\_fixed\_phases ============================================== .. currentmodule:: espaloma.mm.functional .. autofunction:: periodic_fixed_phases ================================================ FILE: docs/autosummary/espaloma.mm.functional.rst ================================================ espaloma.mm.functional ====================== .. automodule:: espaloma.mm.functional .. rubric:: Functions .. autosummary:: :toctree: gaussian harmonic harmonic_harmonic_coupled harmonic_harmonic_periodic_coupled harmonic_periodic_coupled linear_mixture linear_mixture_to_original lj periodic periodic_fixed_phases ================================================ FILE: docs/autosummary/espaloma.mm.geometry.GeometryInGraph.rst ================================================ espaloma.mm.geometry.GeometryInGraph ==================================== .. currentmodule:: espaloma.mm.geometry .. autoclass:: GeometryInGraph :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~GeometryInGraph.__init__ ~GeometryInGraph.add_module ~GeometryInGraph.apply ~GeometryInGraph.bfloat16 ~GeometryInGraph.buffers ~GeometryInGraph.children ~GeometryInGraph.cpu ~GeometryInGraph.cuda ~GeometryInGraph.double ~GeometryInGraph.eval ~GeometryInGraph.extra_repr ~GeometryInGraph.float ~GeometryInGraph.forward ~GeometryInGraph.half ~GeometryInGraph.load_state_dict ~GeometryInGraph.modules ~GeometryInGraph.named_buffers ~GeometryInGraph.named_children ~GeometryInGraph.named_modules ~GeometryInGraph.named_parameters ~GeometryInGraph.parameters ~GeometryInGraph.register_backward_hook ~GeometryInGraph.register_buffer ~GeometryInGraph.register_forward_hook ~GeometryInGraph.register_forward_pre_hook ~GeometryInGraph.register_parameter ~GeometryInGraph.requires_grad_ ~GeometryInGraph.share_memory ~GeometryInGraph.state_dict ~GeometryInGraph.to ~GeometryInGraph.train ~GeometryInGraph.type ~GeometryInGraph.zero_grad .. rubric:: Attributes .. autosummary:: ~GeometryInGraph.T_destination ~GeometryInGraph.dump_patches ================================================ FILE: docs/autosummary/espaloma.mm.geometry.angle.rst ================================================ espaloma.mm.geometry.angle ========================== .. currentmodule:: espaloma.mm.geometry .. autofunction:: angle ================================================ FILE: docs/autosummary/espaloma.mm.geometry.apply_angle.rst ================================================ espaloma.mm.geometry.apply\_angle ================================= .. currentmodule:: espaloma.mm.geometry .. autofunction:: apply_angle ================================================ FILE: docs/autosummary/espaloma.mm.geometry.apply_bond.rst ================================================ espaloma.mm.geometry.apply\_bond ================================ .. currentmodule:: espaloma.mm.geometry .. autofunction:: apply_bond ================================================ FILE: docs/autosummary/espaloma.mm.geometry.apply_torsion.rst ================================================ espaloma.mm.geometry.apply\_torsion =================================== .. currentmodule:: espaloma.mm.geometry .. autofunction:: apply_torsion ================================================ FILE: docs/autosummary/espaloma.mm.geometry.copy_src.rst ================================================ espaloma.mm.geometry.copy\_src ============================== .. currentmodule:: espaloma.mm.geometry .. autofunction:: copy_src ================================================ FILE: docs/autosummary/espaloma.mm.geometry.dihedral.rst ================================================ espaloma.mm.geometry.dihedral ============================= .. currentmodule:: espaloma.mm.geometry .. autofunction:: dihedral ================================================ FILE: docs/autosummary/espaloma.mm.geometry.distance.rst ================================================ espaloma.mm.geometry.distance ============================= .. currentmodule:: espaloma.mm.geometry .. autofunction:: distance ================================================ FILE: docs/autosummary/espaloma.mm.geometry.geometry_in_graph.rst ================================================ espaloma.mm.geometry.geometry\_in\_graph ======================================== .. currentmodule:: espaloma.mm.geometry .. autofunction:: geometry_in_graph ================================================ FILE: docs/autosummary/espaloma.mm.geometry.reduce_stack.rst ================================================ espaloma.mm.geometry.reduce\_stack ================================== .. currentmodule:: espaloma.mm.geometry .. autofunction:: reduce_stack ================================================ FILE: docs/autosummary/espaloma.mm.geometry.rst ================================================ espaloma.mm.geometry ==================== .. automodule:: espaloma.mm.geometry .. rubric:: Functions .. autosummary:: :toctree: angle apply_angle apply_bond apply_torsion copy_src dihedral distance geometry_in_graph reduce_stack .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst GeometryInGraph ================================================ FILE: docs/autosummary/espaloma.mm.nonbonded.arithmetic_mean.rst ================================================ espaloma.mm.nonbonded.arithmetic\_mean ====================================== .. currentmodule:: espaloma.mm.nonbonded .. autofunction:: arithmetic_mean ================================================ FILE: docs/autosummary/espaloma.mm.nonbonded.geometric_mean.rst ================================================ espaloma.mm.nonbonded.geometric\_mean ===================================== .. currentmodule:: espaloma.mm.nonbonded .. autofunction:: geometric_mean ================================================ FILE: docs/autosummary/espaloma.mm.nonbonded.lj_12_6.rst ================================================ espaloma.mm.nonbonded.lj\_12\_6 =============================== .. currentmodule:: espaloma.mm.nonbonded .. autofunction:: lj_12_6 ================================================ FILE: docs/autosummary/espaloma.mm.nonbonded.lj_9_6.rst ================================================ espaloma.mm.nonbonded.lj\_9\_6 ============================== .. currentmodule:: espaloma.mm.nonbonded .. autofunction:: lj_9_6 ================================================ FILE: docs/autosummary/espaloma.mm.nonbonded.lorentz_berthelot.rst ================================================ espaloma.mm.nonbonded.lorentz\_berthelot ======================================== .. currentmodule:: espaloma.mm.nonbonded .. autofunction:: lorentz_berthelot ================================================ FILE: docs/autosummary/espaloma.mm.nonbonded.rst ================================================ espaloma.mm.nonbonded ===================== .. automodule:: espaloma.mm.nonbonded .. rubric:: Functions .. autosummary:: :toctree: arithmetic_mean geometric_mean lj_12_6 lj_9_6 lorentz_berthelot ================================================ FILE: docs/autosummary/espaloma.mm.rst ================================================ espaloma.mm =========== .. automodule:: espaloma.mm .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.mm.angle espaloma.mm.bond espaloma.mm.energy espaloma.mm.functional espaloma.mm.geometry espaloma.mm.nonbonded espaloma.mm.torsion ================================================ FILE: docs/autosummary/espaloma.mm.torsion.angle_angle.rst ================================================ espaloma.mm.torsion.angle\_angle ================================ .. currentmodule:: espaloma.mm.torsion .. autofunction:: angle_angle ================================================ FILE: docs/autosummary/espaloma.mm.torsion.angle_angle_torsion.rst ================================================ espaloma.mm.torsion.angle\_angle\_torsion ========================================= .. currentmodule:: espaloma.mm.torsion .. autofunction:: angle_angle_torsion ================================================ FILE: docs/autosummary/espaloma.mm.torsion.angle_torsion.rst ================================================ espaloma.mm.torsion.angle\_torsion ================================== .. currentmodule:: espaloma.mm.torsion .. autofunction:: angle_torsion ================================================ FILE: docs/autosummary/espaloma.mm.torsion.bond_torsion.rst ================================================ espaloma.mm.torsion.bond\_torsion ================================= .. currentmodule:: espaloma.mm.torsion .. autofunction:: bond_torsion ================================================ FILE: docs/autosummary/espaloma.mm.torsion.periodic_torsion.rst ================================================ espaloma.mm.torsion.periodic\_torsion ===================================== .. currentmodule:: espaloma.mm.torsion .. autofunction:: periodic_torsion ================================================ FILE: docs/autosummary/espaloma.mm.torsion.rst ================================================ espaloma.mm.torsion =================== .. automodule:: espaloma.mm.torsion .. rubric:: Functions .. autosummary:: :toctree: angle_angle angle_angle_torsion angle_torsion bond_torsion periodic_torsion ================================================ FILE: docs/autosummary/espaloma.nn.baselines.FreeParameterBaseline.rst ================================================ espaloma.nn.baselines.FreeParameterBaseline =========================================== .. currentmodule:: espaloma.nn.baselines .. autoclass:: FreeParameterBaseline :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~FreeParameterBaseline.__init__ ~FreeParameterBaseline.add_module ~FreeParameterBaseline.apply ~FreeParameterBaseline.bfloat16 ~FreeParameterBaseline.buffers ~FreeParameterBaseline.children ~FreeParameterBaseline.cpu ~FreeParameterBaseline.cuda ~FreeParameterBaseline.double ~FreeParameterBaseline.eval ~FreeParameterBaseline.extra_repr ~FreeParameterBaseline.float ~FreeParameterBaseline.forward ~FreeParameterBaseline.half ~FreeParameterBaseline.load_state_dict ~FreeParameterBaseline.modules ~FreeParameterBaseline.named_buffers ~FreeParameterBaseline.named_children ~FreeParameterBaseline.named_modules ~FreeParameterBaseline.named_parameters ~FreeParameterBaseline.parameters ~FreeParameterBaseline.register_backward_hook ~FreeParameterBaseline.register_buffer ~FreeParameterBaseline.register_forward_hook ~FreeParameterBaseline.register_forward_pre_hook ~FreeParameterBaseline.register_parameter ~FreeParameterBaseline.requires_grad_ ~FreeParameterBaseline.share_memory ~FreeParameterBaseline.state_dict ~FreeParameterBaseline.to ~FreeParameterBaseline.train ~FreeParameterBaseline.type ~FreeParameterBaseline.zero_grad .. rubric:: Attributes .. autosummary:: ~FreeParameterBaseline.T_destination ~FreeParameterBaseline.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.baselines.FreeParameterBaselineInitMean.rst ================================================ espaloma.nn.baselines.FreeParameterBaselineInitMean =================================================== .. currentmodule:: espaloma.nn.baselines .. autoclass:: FreeParameterBaselineInitMean :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~FreeParameterBaselineInitMean.__init__ ~FreeParameterBaselineInitMean.add_module ~FreeParameterBaselineInitMean.apply ~FreeParameterBaselineInitMean.bfloat16 ~FreeParameterBaselineInitMean.buffers ~FreeParameterBaselineInitMean.children ~FreeParameterBaselineInitMean.cpu ~FreeParameterBaselineInitMean.cuda ~FreeParameterBaselineInitMean.double ~FreeParameterBaselineInitMean.eval ~FreeParameterBaselineInitMean.extra_repr ~FreeParameterBaselineInitMean.float ~FreeParameterBaselineInitMean.forward ~FreeParameterBaselineInitMean.half ~FreeParameterBaselineInitMean.load_state_dict ~FreeParameterBaselineInitMean.modules ~FreeParameterBaselineInitMean.named_buffers ~FreeParameterBaselineInitMean.named_children ~FreeParameterBaselineInitMean.named_modules ~FreeParameterBaselineInitMean.named_parameters ~FreeParameterBaselineInitMean.parameters ~FreeParameterBaselineInitMean.register_backward_hook ~FreeParameterBaselineInitMean.register_buffer ~FreeParameterBaselineInitMean.register_forward_hook ~FreeParameterBaselineInitMean.register_forward_pre_hook ~FreeParameterBaselineInitMean.register_parameter ~FreeParameterBaselineInitMean.requires_grad_ ~FreeParameterBaselineInitMean.share_memory ~FreeParameterBaselineInitMean.state_dict ~FreeParameterBaselineInitMean.to ~FreeParameterBaselineInitMean.train ~FreeParameterBaselineInitMean.type ~FreeParameterBaselineInitMean.zero_grad .. rubric:: Attributes .. autosummary:: ~FreeParameterBaselineInitMean.T_destination ~FreeParameterBaselineInitMean.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.baselines.rst ================================================ espaloma.nn.baselines ===================== .. automodule:: espaloma.nn.baselines .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst FreeParameterBaseline FreeParameterBaselineInitMean ================================================ FILE: docs/autosummary/espaloma.nn.layers.dgl_legacy.GN.rst ================================================ espaloma.nn.layers.dgl\_legacy.gn ================================= .. currentmodule:: espaloma.nn.layers.dgl_legacy .. autofunction:: gn ================================================ FILE: docs/autosummary/espaloma.nn.layers.dgl_legacy.rst ================================================ espaloma.nn.layers.dgl\_legacy ============================== .. automodule:: espaloma.nn.layers.dgl_legacy .. rubric:: Functions .. autosummary:: :toctree: gn .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst GN ================================================ FILE: docs/autosummary/espaloma.nn.layers.rst ================================================ espaloma.nn.layers ================== .. automodule:: espaloma.nn.layers .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.nn.layers.dgl_legacy ================================================ FILE: docs/autosummary/espaloma.nn.readout.base_readout.BaseReadout.rst ================================================ espaloma.nn.readout.base\_readout.BaseReadout ============================================= .. currentmodule:: espaloma.nn.readout.base_readout .. autoclass:: BaseReadout :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~BaseReadout.__init__ ~BaseReadout.add_module ~BaseReadout.apply ~BaseReadout.bfloat16 ~BaseReadout.buffers ~BaseReadout.children ~BaseReadout.cpu ~BaseReadout.cuda ~BaseReadout.double ~BaseReadout.eval ~BaseReadout.extra_repr ~BaseReadout.float ~BaseReadout.forward ~BaseReadout.half ~BaseReadout.load_state_dict ~BaseReadout.modules ~BaseReadout.named_buffers ~BaseReadout.named_children ~BaseReadout.named_modules ~BaseReadout.named_parameters ~BaseReadout.parameters ~BaseReadout.register_backward_hook ~BaseReadout.register_buffer ~BaseReadout.register_forward_hook ~BaseReadout.register_forward_pre_hook ~BaseReadout.register_parameter ~BaseReadout.requires_grad_ ~BaseReadout.share_memory ~BaseReadout.state_dict ~BaseReadout.to ~BaseReadout.train ~BaseReadout.type ~BaseReadout.zero_grad .. rubric:: Attributes .. autosummary:: ~BaseReadout.T_destination ~BaseReadout.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.base_readout.rst ================================================ espaloma.nn.readout.base\_readout ================================= .. automodule:: espaloma.nn.readout.base_readout .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst BaseReadout ================================================ FILE: docs/autosummary/espaloma.nn.readout.charge_equilibrium.ChargeEquilibrium.rst ================================================ espaloma.nn.readout.charge\_equilibrium.ChargeEquilibrium ========================================================= .. currentmodule:: espaloma.nn.readout.charge_equilibrium .. autoclass:: ChargeEquilibrium :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~ChargeEquilibrium.__init__ ~ChargeEquilibrium.add_module ~ChargeEquilibrium.apply ~ChargeEquilibrium.bfloat16 ~ChargeEquilibrium.buffers ~ChargeEquilibrium.children ~ChargeEquilibrium.cpu ~ChargeEquilibrium.cuda ~ChargeEquilibrium.double ~ChargeEquilibrium.eval ~ChargeEquilibrium.extra_repr ~ChargeEquilibrium.float ~ChargeEquilibrium.forward ~ChargeEquilibrium.half ~ChargeEquilibrium.load_state_dict ~ChargeEquilibrium.modules ~ChargeEquilibrium.named_buffers ~ChargeEquilibrium.named_children ~ChargeEquilibrium.named_modules ~ChargeEquilibrium.named_parameters ~ChargeEquilibrium.parameters ~ChargeEquilibrium.register_backward_hook ~ChargeEquilibrium.register_buffer ~ChargeEquilibrium.register_forward_hook ~ChargeEquilibrium.register_forward_pre_hook ~ChargeEquilibrium.register_parameter ~ChargeEquilibrium.requires_grad_ ~ChargeEquilibrium.share_memory ~ChargeEquilibrium.state_dict ~ChargeEquilibrium.to ~ChargeEquilibrium.train ~ChargeEquilibrium.type ~ChargeEquilibrium.zero_grad .. rubric:: Attributes .. autosummary:: ~ChargeEquilibrium.T_destination ~ChargeEquilibrium.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.charge_equilibrium.get_charges.rst ================================================ espaloma.nn.readout.charge\_equilibrium.get\_charges ==================================================== .. currentmodule:: espaloma.nn.readout.charge_equilibrium .. autofunction:: get_charges ================================================ FILE: docs/autosummary/espaloma.nn.readout.charge_equilibrium.rst ================================================ espaloma.nn.readout.charge\_equilibrium ======================================= .. automodule:: espaloma.nn.readout.charge_equilibrium .. rubric:: Functions .. autosummary:: :toctree: get_charges .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst ChargeEquilibrium ================================================ FILE: docs/autosummary/espaloma.nn.readout.graph_level_readout.GraphLevelReadout.rst ================================================ espaloma.nn.readout.graph\_level\_readout.GraphLevelReadout =========================================================== .. currentmodule:: espaloma.nn.readout.graph_level_readout .. autoclass:: GraphLevelReadout :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~GraphLevelReadout.__init__ ~GraphLevelReadout.add_module ~GraphLevelReadout.apply ~GraphLevelReadout.bfloat16 ~GraphLevelReadout.buffers ~GraphLevelReadout.children ~GraphLevelReadout.cpu ~GraphLevelReadout.cuda ~GraphLevelReadout.double ~GraphLevelReadout.eval ~GraphLevelReadout.extra_repr ~GraphLevelReadout.float ~GraphLevelReadout.forward ~GraphLevelReadout.half ~GraphLevelReadout.load_state_dict ~GraphLevelReadout.modules ~GraphLevelReadout.named_buffers ~GraphLevelReadout.named_children ~GraphLevelReadout.named_modules ~GraphLevelReadout.named_parameters ~GraphLevelReadout.parameters ~GraphLevelReadout.register_backward_hook ~GraphLevelReadout.register_buffer ~GraphLevelReadout.register_forward_hook ~GraphLevelReadout.register_forward_pre_hook ~GraphLevelReadout.register_parameter ~GraphLevelReadout.requires_grad_ ~GraphLevelReadout.share_memory ~GraphLevelReadout.state_dict ~GraphLevelReadout.to ~GraphLevelReadout.train ~GraphLevelReadout.type ~GraphLevelReadout.zero_grad .. rubric:: Attributes .. autosummary:: ~GraphLevelReadout.T_destination ~GraphLevelReadout.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.graph_level_readout.rst ================================================ espaloma.nn.readout.graph\_level\_readout ========================================= .. automodule:: espaloma.nn.readout.graph_level_readout .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst GraphLevelReadout ================================================ FILE: docs/autosummary/espaloma.nn.readout.janossy.ExpCoefficients.rst ================================================ espaloma.nn.readout.janossy.ExpCoefficients =========================================== .. currentmodule:: espaloma.nn.readout.janossy .. autoclass:: ExpCoefficients :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~ExpCoefficients.__init__ ~ExpCoefficients.add_module ~ExpCoefficients.apply ~ExpCoefficients.bfloat16 ~ExpCoefficients.buffers ~ExpCoefficients.children ~ExpCoefficients.cpu ~ExpCoefficients.cuda ~ExpCoefficients.double ~ExpCoefficients.eval ~ExpCoefficients.extra_repr ~ExpCoefficients.float ~ExpCoefficients.forward ~ExpCoefficients.half ~ExpCoefficients.load_state_dict ~ExpCoefficients.modules ~ExpCoefficients.named_buffers ~ExpCoefficients.named_children ~ExpCoefficients.named_modules ~ExpCoefficients.named_parameters ~ExpCoefficients.parameters ~ExpCoefficients.register_backward_hook ~ExpCoefficients.register_buffer ~ExpCoefficients.register_forward_hook ~ExpCoefficients.register_forward_pre_hook ~ExpCoefficients.register_parameter ~ExpCoefficients.requires_grad_ ~ExpCoefficients.share_memory ~ExpCoefficients.state_dict ~ExpCoefficients.to ~ExpCoefficients.train ~ExpCoefficients.type ~ExpCoefficients.zero_grad .. rubric:: Attributes .. autosummary:: ~ExpCoefficients.T_destination ~ExpCoefficients.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.janossy.JanossyPooling.rst ================================================ espaloma.nn.readout.janossy.JanossyPooling ========================================== .. currentmodule:: espaloma.nn.readout.janossy .. autoclass:: JanossyPooling :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~JanossyPooling.__init__ ~JanossyPooling.add_module ~JanossyPooling.apply ~JanossyPooling.bfloat16 ~JanossyPooling.buffers ~JanossyPooling.children ~JanossyPooling.cpu ~JanossyPooling.cuda ~JanossyPooling.double ~JanossyPooling.eval ~JanossyPooling.extra_repr ~JanossyPooling.float ~JanossyPooling.forward ~JanossyPooling.half ~JanossyPooling.load_state_dict ~JanossyPooling.modules ~JanossyPooling.named_buffers ~JanossyPooling.named_children ~JanossyPooling.named_modules ~JanossyPooling.named_parameters ~JanossyPooling.parameters ~JanossyPooling.register_backward_hook ~JanossyPooling.register_buffer ~JanossyPooling.register_forward_hook ~JanossyPooling.register_forward_pre_hook ~JanossyPooling.register_parameter ~JanossyPooling.requires_grad_ ~JanossyPooling.share_memory ~JanossyPooling.state_dict ~JanossyPooling.to ~JanossyPooling.train ~JanossyPooling.type ~JanossyPooling.zero_grad .. rubric:: Attributes .. autosummary:: ~JanossyPooling.T_destination ~JanossyPooling.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.janossy.JanossyPoolingImproper.rst ================================================ espaloma.nn.readout.janossy.JanossyPoolingImproper ================================================== .. currentmodule:: espaloma.nn.readout.janossy .. autoclass:: JanossyPoolingImproper :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~JanossyPoolingImproper.__init__ ~JanossyPoolingImproper.add_module ~JanossyPoolingImproper.apply ~JanossyPoolingImproper.bfloat16 ~JanossyPoolingImproper.buffers ~JanossyPoolingImproper.children ~JanossyPoolingImproper.cpu ~JanossyPoolingImproper.cuda ~JanossyPoolingImproper.double ~JanossyPoolingImproper.eval ~JanossyPoolingImproper.extra_repr ~JanossyPoolingImproper.float ~JanossyPoolingImproper.forward ~JanossyPoolingImproper.half ~JanossyPoolingImproper.load_state_dict ~JanossyPoolingImproper.modules ~JanossyPoolingImproper.named_buffers ~JanossyPoolingImproper.named_children ~JanossyPoolingImproper.named_modules ~JanossyPoolingImproper.named_parameters ~JanossyPoolingImproper.parameters ~JanossyPoolingImproper.register_backward_hook ~JanossyPoolingImproper.register_buffer ~JanossyPoolingImproper.register_forward_hook ~JanossyPoolingImproper.register_forward_pre_hook ~JanossyPoolingImproper.register_parameter ~JanossyPoolingImproper.requires_grad_ ~JanossyPoolingImproper.share_memory ~JanossyPoolingImproper.state_dict ~JanossyPoolingImproper.to ~JanossyPoolingImproper.train ~JanossyPoolingImproper.type ~JanossyPoolingImproper.zero_grad .. rubric:: Attributes .. autosummary:: ~JanossyPoolingImproper.T_destination ~JanossyPoolingImproper.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.janossy.JanossyPoolingNonbonded.rst ================================================ espaloma.nn.readout.janossy.JanossyPoolingNonbonded =================================================== .. currentmodule:: espaloma.nn.readout.janossy .. autoclass:: JanossyPoolingNonbonded :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~JanossyPoolingNonbonded.__init__ ~JanossyPoolingNonbonded.add_module ~JanossyPoolingNonbonded.apply ~JanossyPoolingNonbonded.bfloat16 ~JanossyPoolingNonbonded.buffers ~JanossyPoolingNonbonded.children ~JanossyPoolingNonbonded.cpu ~JanossyPoolingNonbonded.cuda ~JanossyPoolingNonbonded.double ~JanossyPoolingNonbonded.eval ~JanossyPoolingNonbonded.extra_repr ~JanossyPoolingNonbonded.float ~JanossyPoolingNonbonded.forward ~JanossyPoolingNonbonded.half ~JanossyPoolingNonbonded.load_state_dict ~JanossyPoolingNonbonded.modules ~JanossyPoolingNonbonded.named_buffers ~JanossyPoolingNonbonded.named_children ~JanossyPoolingNonbonded.named_modules ~JanossyPoolingNonbonded.named_parameters ~JanossyPoolingNonbonded.parameters ~JanossyPoolingNonbonded.register_backward_hook ~JanossyPoolingNonbonded.register_buffer ~JanossyPoolingNonbonded.register_forward_hook ~JanossyPoolingNonbonded.register_forward_pre_hook ~JanossyPoolingNonbonded.register_parameter ~JanossyPoolingNonbonded.requires_grad_ ~JanossyPoolingNonbonded.share_memory ~JanossyPoolingNonbonded.state_dict ~JanossyPoolingNonbonded.to ~JanossyPoolingNonbonded.train ~JanossyPoolingNonbonded.type ~JanossyPoolingNonbonded.zero_grad .. rubric:: Attributes .. autosummary:: ~JanossyPoolingNonbonded.T_destination ~JanossyPoolingNonbonded.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.janossy.LinearMixtureToOriginal.rst ================================================ espaloma.nn.readout.janossy.LinearMixtureToOriginal =================================================== .. currentmodule:: espaloma.nn.readout.janossy .. autoclass:: LinearMixtureToOriginal :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~LinearMixtureToOriginal.__init__ ~LinearMixtureToOriginal.add_module ~LinearMixtureToOriginal.apply ~LinearMixtureToOriginal.bfloat16 ~LinearMixtureToOriginal.buffers ~LinearMixtureToOriginal.children ~LinearMixtureToOriginal.cpu ~LinearMixtureToOriginal.cuda ~LinearMixtureToOriginal.double ~LinearMixtureToOriginal.eval ~LinearMixtureToOriginal.extra_repr ~LinearMixtureToOriginal.float ~LinearMixtureToOriginal.forward ~LinearMixtureToOriginal.half ~LinearMixtureToOriginal.load_state_dict ~LinearMixtureToOriginal.modules ~LinearMixtureToOriginal.named_buffers ~LinearMixtureToOriginal.named_children ~LinearMixtureToOriginal.named_modules ~LinearMixtureToOriginal.named_parameters ~LinearMixtureToOriginal.parameters ~LinearMixtureToOriginal.register_backward_hook ~LinearMixtureToOriginal.register_buffer ~LinearMixtureToOriginal.register_forward_hook ~LinearMixtureToOriginal.register_forward_pre_hook ~LinearMixtureToOriginal.register_parameter ~LinearMixtureToOriginal.requires_grad_ ~LinearMixtureToOriginal.share_memory ~LinearMixtureToOriginal.state_dict ~LinearMixtureToOriginal.to ~LinearMixtureToOriginal.train ~LinearMixtureToOriginal.type ~LinearMixtureToOriginal.zero_grad .. rubric:: Attributes .. autosummary:: ~LinearMixtureToOriginal.T_destination ~LinearMixtureToOriginal.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.janossy.rst ================================================ espaloma.nn.readout.janossy =========================== .. automodule:: espaloma.nn.readout.janossy .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst ExpCoefficients JanossyPooling JanossyPoolingImproper JanossyPoolingNonbonded LinearMixtureToOriginal ================================================ FILE: docs/autosummary/espaloma.nn.readout.node_typing.NodeTyping.rst ================================================ espaloma.nn.readout.node\_typing.NodeTyping =========================================== .. currentmodule:: espaloma.nn.readout.node_typing .. autoclass:: NodeTyping :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~NodeTyping.__init__ ~NodeTyping.add_module ~NodeTyping.apply ~NodeTyping.bfloat16 ~NodeTyping.buffers ~NodeTyping.children ~NodeTyping.cpu ~NodeTyping.cuda ~NodeTyping.double ~NodeTyping.eval ~NodeTyping.extra_repr ~NodeTyping.float ~NodeTyping.forward ~NodeTyping.half ~NodeTyping.load_state_dict ~NodeTyping.modules ~NodeTyping.named_buffers ~NodeTyping.named_children ~NodeTyping.named_modules ~NodeTyping.named_parameters ~NodeTyping.parameters ~NodeTyping.register_backward_hook ~NodeTyping.register_buffer ~NodeTyping.register_forward_hook ~NodeTyping.register_forward_pre_hook ~NodeTyping.register_parameter ~NodeTyping.requires_grad_ ~NodeTyping.share_memory ~NodeTyping.state_dict ~NodeTyping.to ~NodeTyping.train ~NodeTyping.type ~NodeTyping.zero_grad .. rubric:: Attributes .. autosummary:: ~NodeTyping.T_destination ~NodeTyping.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.readout.node_typing.rst ================================================ espaloma.nn.readout.node\_typing ================================ .. automodule:: espaloma.nn.readout.node_typing .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst NodeTyping ================================================ FILE: docs/autosummary/espaloma.nn.readout.rst ================================================ espaloma.nn.readout =================== .. automodule:: espaloma.nn.readout .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.nn.readout.base_readout espaloma.nn.readout.charge_equilibrium espaloma.nn.readout.graph_level_readout espaloma.nn.readout.janossy espaloma.nn.readout.node_typing ================================================ FILE: docs/autosummary/espaloma.nn.rst ================================================ espaloma.nn =========== .. automodule:: espaloma.nn .. rubric:: Modules .. autosummary:: :toctree: :template: custom-module-template.rst :recursive: espaloma.nn.baselines espaloma.nn.layers espaloma.nn.readout espaloma.nn.sequential ================================================ FILE: docs/autosummary/espaloma.nn.sequential.Sequential.rst ================================================ espaloma.nn.sequential.Sequential ================================= .. currentmodule:: espaloma.nn.sequential .. autoclass:: Sequential :members: :show-inheritance: :inherited-members: .. automethod:: __init__ .. rubric:: Methods .. autosummary:: ~Sequential.__init__ ~Sequential.add_module ~Sequential.apply ~Sequential.bfloat16 ~Sequential.buffers ~Sequential.children ~Sequential.cpu ~Sequential.cuda ~Sequential.double ~Sequential.eval ~Sequential.extra_repr ~Sequential.float ~Sequential.forward ~Sequential.half ~Sequential.load_state_dict ~Sequential.modules ~Sequential.named_buffers ~Sequential.named_children ~Sequential.named_modules ~Sequential.named_parameters ~Sequential.parameters ~Sequential.register_backward_hook ~Sequential.register_buffer ~Sequential.register_forward_hook ~Sequential.register_forward_pre_hook ~Sequential.register_parameter ~Sequential.requires_grad_ ~Sequential.share_memory ~Sequential.state_dict ~Sequential.to ~Sequential.train ~Sequential.type ~Sequential.zero_grad .. rubric:: Attributes .. autosummary:: ~Sequential.T_destination ~Sequential.dump_patches ================================================ FILE: docs/autosummary/espaloma.nn.sequential.rst ================================================ espaloma.nn.sequential ====================== .. automodule:: espaloma.nn.sequential .. rubric:: Classes .. autosummary:: :toctree: :template: custom-class-template.rst Sequential ================================================ FILE: docs/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/stable/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # Incase the project was not installed import os import sys import subprocess sys.path.insert(0, os.path.abspath('..')) # -- Project information ----------------------------------------------------- project = 'espaloma' copyright = ("2020, Yuanqing Wang @ choderalab // MSKCC.") author = 'Yuanqing Wang' github_url = "https://github.com/choderalab/espaloma" # The short X.Y version version = '' # The full version, including alpha/beta/rc tags release = '' # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autosummary', 'sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', 'sphinx.ext.intersphinx', 'sphinx.ext.extlinks', 'sphinx.ext.coverage', # 'numpydoc', ] autosummary_generate = True napoleon_google_docstring = False napoleon_use_param = False napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'default' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = 'espalomadoc' # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'espaloma.tex', 'espaloma Documentation', 'espaloma', 'manual'), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'espaloma', 'espaloma Documentation', [author], 1) ] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'espaloma', 'espaloma Documentation', author, 'espaloma', 'Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm', 'Miscellaneous'), ] # -- Extension configuration ------------------------------------------------- ================================================ FILE: docs/deploy.rst ================================================ Deploy espaloma 0.3.2 force field to parametrize your MM system =============================================================== Pretrained espaloma force field could be deployed on arbitrary small molecule systems in a few lines:: # imports import os import torch import espaloma as esp # define or load a molecule of interest via the Open Force Field toolkit from openff.toolkit.topology import Molecule molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") # create an Espaloma Graph object to represent the molecule of interest molecule_graph = esp.Graph(molecule) # load pretrained model espaloma_model = esp.get_model("latest") # apply a trained espaloma model to assign parameters espaloma_model(molecule_graph.heterograph) # create an OpenMM System for the specified molecule openmm_system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph) If using espaloma from a local ``.pt`` file, say for example ``espaloma-0.3.2.pt``, then you would need to run the ``eval`` method of the model to get the correct inference/predictions, as follows:: # load local pretrained model espaloma_model = torch.load("espaloma-0.3.2.pt") espaloma_model.eval() The rest of the code should be the same as in the previous example. ================================================ FILE: docs/download_experiments.sh ================================================ export fileid=1qdHEypk3uMhZEYCStWTU8u1uIDHzH3Qy wget -O typing.ipynb 'https://docs.google.com/uc?export=download&id='$fileid ipython nbconvert typing.ipynb --to rst --TagRemovePreprocessor.remove_all_outputs_tags='{"remove_output"}' mv typing.rst experiments/typing.rst export fileid=1krhwGHKoqL5-_P0G89fDB7Iw3ENHW2G_ wget -O mm_fitting_small.ipynb 'https://docs.google.com/uc?export=download&id='$fileid ipython nbconvert mm_fitting_small.ipynb --to rst --TagRemovePreprocessor.remove_all_outputs_tags='{"remove_output"}' mv mm_fitting_small.rst experiments/mm_fitting_small.rst mv mm_fitting_small_files experiments/mm_fitting_small_files export fileid=1i_z0b0-m_91bMww1hY5Kdc76VHmtHsWD wget -O qm_fitting.ipynb 'https://docs.google.com/uc?export=download&id='$fileid ipython nbconvert qm_fitting.ipynb --to rst --TagRemovePreprocessor.remove_all_outputs_tags='{"remove_output"}' cp qm_fitting.rst experiments/qm_fitting.rst rm *.ipynb ================================================ FILE: docs/experiments/index.rst ================================================ To reproduce experiments in paper https://arxiv.org/abs/2010.01196 .. toctree:: :maxdepth: 2 :caption: Contents: typing mm_fitting_small qm_fitting ================================================ FILE: docs/experiments/mm_fitting_small.rst ================================================ Toy experiment: Molecular mechanics (MM) fitting on subsampled PhAlkEthOH dataset. ================================================================================== **Open in Google Colab**: http://data.wangyq.net/esp_notebooks/phalkethoh_mm_small.ipynb This notebook is intended to recover the MM fitting behavior in https://arxiv.org/abs/2010.01196 To assess how well Espaloma can learn to reproduce an MM force field from a limited amount of data, we selected a chemical dataset of limited complexity—PhAlkEthOH—which consists of linear and cyclic molecules containing phenyl rings, small alkanes, ethers, and alcohols composed of only the elements carbon, oxygen, and hydrogen. We generated a set of conformational snapshots for each molecule using short high-temperature molecular dynamics simulations at 300~K initiated from multiple conformations to ensure adequate sampling of conformers. The AlkEthOH dataset was randomly partitioned (by molecules) into 80% training, 10% validation, and 10% test molecules, with 100 snapshots/molecule, and an Espaloma model was trained with early stopping via monitoring for a decrease in accuracy in the validation set. .. image:: https://pbs.twimg.com/media/FBL0qACXIBkJLQZ?format=png&name=4096x4096 Installation and imports ------------------------ .. code:: python # install conda ! pip install -q condacolab import condacolab condacolab.install() .. parsed-literal:: ⏬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh... 📦 Installing... 📌 Adjusting configuration... 🩹 Patching environment... ⏲ Done in 0:00:34 🔁 Restarting kernel... .. code:: python %%capture ! mamba install --yes --strict-channel-priority --channel jaimergp/label/unsupported-cudatoolkit-shim --channel omnia --channel omnia/label/cuda100 --channel dglteam --channel numpy openmm openmmtools openmmforcefields rdkit openff-toolkit dgl-cuda10.0 qcportal .. code:: python ! git clone https://github.com/choderalab/espaloma.git .. parsed-literal:: Cloning into 'espaloma'... remote: Enumerating objects: 7812, done. remote: Counting objects: 100% (3634/3634), done. remote: Compressing objects: 100% (1649/1649), done. remote: Total 7812 (delta 2714), reused 2639 (delta 1900), pack-reused 4178 Receiving objects: 100% (7812/7812), 13.50 MiB | 11.77 MiB/s, done. Resolving deltas: 100% (5538/5538), done. .. code:: python import torch import sys sys.path.append("/content/espaloma") import espaloma as esp .. parsed-literal:: Warning: Unable to load toolkit 'OpenEye Toolkit'. The Open Force Field Toolkit does not require the OpenEye Toolkits, and can use RDKit/AmberTools instead. However, if you have a valid license for the OpenEye Toolkits, consider installing them for faster performance and additional file format support: https://docs.eyesopen.com/toolkits/python/quickstart-python/linuxosx.html OpenEye offers free Toolkit licenses for academics: https://www.eyesopen.com/academic-licensing Load dataset ------------ Here we load the PhAlKeThoh dataset and shuffle before splitting into training, validation, and test (80%:10%:10%) .. code:: python %%capture ! wget http://data.wangyq.net/esp_dataset/phalkethoh_mm_small.zip ! unzip phalkethoh_mm_small.zip .. code:: python ds = esp.data.dataset.GraphDataset.load("phalkethoh") ds.shuffle(seed=2666) ds_tr, ds_vl, ds_te = ds.split([8, 1, 1]) .. parsed-literal:: DGL backend not selected or invalid. Assuming PyTorch for now. Using backend: pytorch .. parsed-literal:: Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable. Valid options are: pytorch, mxnet, tensorflow (all lowercase) A training dataloader is constructed with ``batch_size=100`` .. code:: python ds_tr_loader = ds_tr.view(batch_size=100, shuffle=True) .. code:: python g_tr = next(iter(ds_tr.view(batch_size=len(ds_tr)))) g_vl = next(iter(ds_vl.view(batch_size=len(ds_vl)))) .. parsed-literal:: /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: From v0.5, DGLHeteroGraph is merged into DGLGraph. You can safely replace dgl.batch_hetero with dgl.batch return warnings.warn(message, category=category, stacklevel=1) Define model ------------ Define Espaloma stage I: graph -> atom latent representation .. code:: python representation = esp.nn.Sequential( layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation ) Define Espaloma stage II and III: atom latent representation -> bond, angle, and torsion representation and parameters. And compose all three Espaloma stages into an end-to-end model. .. code:: python readout = esp.nn.readout.janossy.JanossyPooling( in_features=128, config=[128, "relu", 128, "relu", 128, "relu"], out_features={ # define modular MM parameters Espaloma will assign 1: {"e": 1, "s": 1}, # atom hardness and electronegativity 2: {"log_coefficients": 2}, # bond linear combination, enforce positive 3: {"log_coefficients": 2}, # angle linear combination, enforce positive 4: {"k": 6}, # torsion barrier heights (can be positive or negative) }, ) espaloma_model = torch.nn.Sequential( representation, readout, esp.nn.readout.janossy.ExpCoefficients(), esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(), esp.mm.energy.EnergyInGraph(suffix="_ref"), esp.nn.readout.charge_equilibrium.ChargeEquilibrium(), ) .. code:: python if torch.cuda.is_available(): espaloma_model = espaloma_model.cuda() Loss function is specified as the MSE between predicted and reference energy. .. code:: python loss_fn = esp.metrics.GraphMetric( base_metric=torch.nn.MSELoss(), # use mean-squared error loss between=['u', "u_ref"], # between predicted and QM energies level="g", # compare on graph level ) Define optimizer ---------------- .. code:: python optimizer = torch.optim.Adam(espaloma_model.parameters(), 1e-4) Train it! --------- .. code:: python for idx_epoch in range(10000): for g in ds_tr_loader: optimizer.zero_grad() if torch.cuda.is_available(): g = g.to("cuda:0") g = espaloma_model(g) loss = loss_fn(g) loss.backward() optimizer.step() torch.save(espaloma_model.state_dict(), "%s.th" % idx_epoch) .. parsed-literal:: /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: From v0.5, DGLHeteroGraph is merged into DGLGraph. You can safely replace dgl.batch_hetero with dgl.batch return warnings.warn(message, category=category, stacklevel=1) /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: dgl.to_homo is deprecated. Please use dgl.to_homogeneous return warnings.warn(message, category=category, stacklevel=1) Inspect ------- .. code:: python inspect_metric = esp.metrics.GraphMetric( base_metric=torch.nn.L1Loss(), # use mean-squared error loss between=['u', "u_ref"], # between predicted and QM energies level="g", # compare on graph level ) .. code:: python if torch.cuda.is_available(): g_vl = g_vl.to("cuda:0") g_tr = g_tr.to("cuda:0") .. code:: python loss_tr = [] loss_vl = [] .. code:: python for idx_epoch in range(10000): espaloma_model.load_state_dict( torch.load("%s.th" % idx_epoch) ) espaloma_model(g_tr) loss_tr.append(inspect_metric(g_tr).item()) espaloma_model(g_vl) loss_vl.append(inspect_metric(g_vl).item()) .. parsed-literal:: /usr/local/lib/python3.7/site-packages/dgl/base.py:45: DGLWarning: dgl.to_homo is deprecated. Please use dgl.to_homogeneous return warnings.warn(message, category=category, stacklevel=1) .. code:: python import numpy as np loss_tr = np.array(loss_tr) * 627.5 loss_vl = np.array(loss_vl) * 627.5 .. code:: python from matplotlib import pyplot as plt plt.plot(loss_tr, label="train") plt.plot(loss_vl, label="valid") plt.yscale("log") plt.legend() .. parsed-literal:: .. image:: mm_fitting_small_files/mm_fitting_small_31_1.png ================================================ FILE: docs/experiments/qm_fitting.rst ================================================ Quantum mechanics (QM) fitting experiment. ========================================== **Open in Google Colab:** http://data.wangyq.net/esp_notesbooks/qm_fitting.ipynb This notebook recovers the QM fitting experiment in https://arxiv.org/abs/2010.01196 |image1| **Table 2:** Espaloma can directly fit quantum chemical energies to produce a new molecular mechanics force fields with better accuracy than traditional force fields based on atom typing or direct chemical perception. Espaloma was fit to quantum chemical potential energies for conformations generated by optimization trajectories from multiple conformers in various datasets from QCArchive.All datasets were partitioned by molecules 80:10:10 into train:validate:test sets. We report the RMSE on training and test sets, as well as the performance of legacy force fields on the test set. All statistics are computed with predicted and reference energies centered to have zero mean for each molecule to focus on errors in relative conformational energetics, rather than on errors in predicting the heats of formation of chemical species (which the MM functional form used here is incapable of). The 95% confidence intervals annotated are calculated by via bootstrapping molecules with replacement using 1000 replicates. \*: Six cyclic peptides that cannot be parametrized using OpenForceField toolkit engine~:raw-latex:`\cite{openff-toolkit-0.10.0}` and is not included. Since Espaloma can derive a force field solely by fitting to energies (and optionally gradients), we repeat the end-to-end fitting experiment (See notebook http://data.wangyq.net/esp_notebooks/phalkethoh_mm_small.ipynb) directly using a quantum chemical (QM) datasets used to build and evaluate MM force fields. We assessed the ability of Espaloma to learn several distinct quantum chemical datasets generated by the Open Force Field Initiativeand deposited in the MolSSI QCArchive: - **PhAlkEthOH** is a collection of compounds containing only the elements carbon, hydrogen, and oxygen in compounds containing phenyl rings, alkanes, ketones, and alcohols. Limited in elemental and chemical diversity, this dataset is chosen as a proof-of-concept to demonstrate the capability of Espaloma to fit and generalize quantum chemical energies when training data is sufficient to exhaustively cover the breadth of chemical environments. - **OpenFF Gen2 Optimization** consists of druglike molecules used in the parametrization of the Open Force Field 1.2.0 (“Parsley”) small molecule force field. This set was constructed by the Open Force Field Consortium from challenging molecule structures provided by Pfizer, Bayer, and Roche, along with diverse molecules selected from eMolecules to achieve useful coverage of chemical space. - **VEHICLe**, or *virtual exploratory heterocyclic library*, is a set of heteroaromatic ring systems of interest to drug discovery. The atoms in the molecules in this dataset have interesting chemical environments in heteroarmatic rings that present a challenge to traditional atom typing schemes, which cannot easily accomodate the nuanced distinctions in chemical environments that lead to perturbations in heterocycle structure.We use this dataset to illustrate that Espaloma performs in situations challenging to traditional force fields. - **PepConf** contains a variety of short peptides, including capped, cyclic, and disulfide-bonded peptides.This dataset—regenerated using the Open Force Field QCSubmit tool—explores the applicability of Espaloma to biopolymers, such as proteins. Since nonbonded terms are generally optimized to fit other condensed-phase properties, we focused here on optimizing only the valence parameters (bond, angle, and proper and improper torsion) to fit these gas-phase quantum chemical datasets, fixing the non-bonded energies using a legacy force field. Because we are learning an MM force field that is incapable of reproducing quantum chemical heats of formation reflected as an additive offset in the quantum chemical energy targets, in both training and test sets, snapshot energies for each molecule are shifted to have zero mean. All datasets are randomly shuffled and split (by molecules) into training (80%), validation (10%), and test (10%) sets. .. |image1| image:: https://pbs.twimg.com/media/FBL1Gb0WEAYkUhM?format=png&name=4096x4096 Installation and imports ------------------------ .. code:: python # install conda ! pip install -q condacolab import condacolab condacolab.install() .. code:: python %%capture ! mamba install --yes --strict-channel-priority --channel jaimergp/label/unsupported-cudatoolkit-shim --channel omnia --channel omnia/label/cuda100 --channel dglteam --channel numpy openmm openmmtools openmmforcefields rdkit openff-toolkit dgl-cuda10.0 qcportal .. code:: python ! git clone https://github.com/choderalab/espaloma.git .. code:: python import torch import sys sys.path.append("/content/espaloma") import espaloma as esp Load dataset ------------ Choose a dataset from ``["gen2", "pepconf", "vehicle", "phalkethoh"]``. .. code:: python dataset_name = "gen2" # dataset_name = "pepconf" # dataset_name = "vehicle" # dataset_name = "phalkethoh" .. code:: python %%capture ! wget "data.wangyq.net/esp_dataset/"$dataset_name".zip" ! unzip $dataset_name".zip" .. code:: python ds = esp.data.dataset.GraphDataset.load(dataset_name) ds.shuffle(seed=2666) ds_tr, ds_vl, ds_te = ds.split([8, 1, 1]) Define model ------------ Define Espaloma stage I: graph -> atom latent representation .. code:: python representation = esp.nn.Sequential( layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation ) Define Espaloma stage II and III: atom latent representation -> bond, angle, and torsion representation and parameters. And compose all three Espaloma stages into an end-to-end model. .. code:: python readout = esp.nn.readout.janossy.JanossyPooling( in_features=128, config=[128, "relu", 128, "relu", 128, "relu"], out_features={ # define modular MM parameters Espaloma will assign 1: {"e": 1, "s": 1}, # atom hardness and electronegativity 2: {"log_coefficients": 2}, # bond linear combination, enforce positive 3: {"log_coefficients": 2}, # angle linear combination, enforce positive 4: {"k": 6}, # torsion barrier heights (can be positive or negative) }, ) espaloma_model = torch.nn.Sequential( representation, readout, esp.nn.readout.janossy.ExpCoefficients(), esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(), ) .. code:: python if torch.cuda.is_available(): espaloma_model = espaloma_model.cuda() Loss function is specified as the MSE between predicted and reference energy. .. code:: python loss_fn = esp.metrics.GraphMetric( base_metric=torch.nn.MSELoss(), # use mean-squared error loss between=['u', "u_ref"], # between predicted and QM energies level="g", # compare on graph level ) Define optimizer ---------------- .. code:: python optimizer = torch.optim.Adam(espaloma_model.parameters(), 1e-4) Train it! --------- .. code:: python for idx_epoch in range(10000): for g in ds_tr: optimizer.zero_grad() if torch.cuda.is_available(): g.heterograph = g.heterograph.to("cuda:0") g = espaloma_model(g.heterograph) loss = loss_fn(g) loss.backward() optimizer.step() torch.save(espaloma_model.state_dict(), "%s.th" % idx_epoch) Inspect ------- .. code:: python inspect_metric = esp.metrics.center(torch.nn.L1Loss()) # use mean-squared error loss .. code:: python loss_tr = [] loss_vl = [] .. code:: python with torch.no_grad(): for idx_epoch in range(10000): espaloma_model.load_state_dict( torch.load("%s.th" % idx_epoch) ) # training set performance u = [] u_ref = [] for g in ds_tr: if torch.cuda.is_available(): g.heterograph = g.heterograph.to("cuda:0") espaloma_model(g.heterograph) u.append(g.nodes['g'].data['u']) u_ref.append(g.nodes['g']) u = torch.cat(u, dim=0) u_ref = torch.cat(u_ref, dim=0) loss_tr.append(inspect_metric(u, u_ref)) # validation set performance u = [] u_ref = [] for g in ds_vl: if torch.cuda.is_available(): g.heterograph = g.heterograph.to("cuda:0") espaloma_model(g.heterograph) u.append(g.nodes['g'].data['u']) u_ref.append(g.nodes['g']) u = torch.cat(u, dim=0) u_ref = torch.cat(u_ref, dim=0) loss_vl.append(inspect_metric(u, u_ref)) .. code:: python import numpy as np loss_tr = np.array(loss_tr) * 627.5 loss_vl = np.array(loss_vl) * 627.5 .. code:: python from matplotlib import pyplot as plt plt.plot(loss_tr, label="train") plt.plot(loss_vl, label="valid") plt.yscale("log") plt.legend() ================================================ FILE: docs/experiments/typing.rst ================================================ Atom typing recovery experiment. ================================ **Open in Google Colab**: http://data.wangyq.net/esp_notebooks/typing.ipynb (GPU preferred) In this notebook, we reproduce the atom typing recovery experiment in `Wang Y, Fass J, and Chodera JD “End-to-End Differentiable Construction of Molecular Mechanics Force Fields `__ (Section 3: Graph neural networks can learn to reproduce human-defined legacy atom types with high accuracy; Figure 3. Graph neural networks can reproduce legacy atom types with high accuracy.) .. image:: https://pbs.twimg.com/media/FBLz_6sWUAM2iHa?format=jpg&name=4096x4096 Graph neural networks can reproduce legacy atom types with high accuracy. The Stage 1 graph neural network of Espaloma chained to a discrete atom type readout was fit to GAFF 1.81 atom types on a subset of ZINC distributed with parm Frosst as a validation set . The 7529 molecules in this set were partitioned 80:10:10 into training:test:validation sets for this experiment. The overall test set accuracy was :math:`99.07\%_{98.93\%}^{99.22\%}`, with 1000 bootstrap replicates used to estimate the confidence intervals arising from finite test set size effects. (a) The distribution of the number of atom type discrepancies on the test set demonstrates that only a minority of atoms are incorrectly typed. (b) The error rate per element is primarily concentrated within carbon, nitrogen, and sulfur types. (c) Examining atom type failures in detail on molecules with the largest numbers of discrepancies shows that the atom types are easily confused by a human, since they represent qualities that are difficult to precisely define. (d) The distribution of predicted atom types for each reference atom type for carbon types are shown; on-diagonal values indicate agreement. The percentages annotated under x-axis denote the relative abundance within the test set. Installation and Imports ------------------------ First, we install espaloma after all of its dependencies. Note that this is going to be significantly simplified. .. code:: python %%capture ! wget -c https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh ! bash Miniconda3-latest-Linux-x86_64.sh -b -f -p /usr/local ! conda config --add channels conda-forge --add channels omnia --add channels omnia/label/cuda100 --add channels dglteam ! conda update --yes --all ! conda create --yes -n openmm python=3.6 numpy openmm openmmtools rdkit openforcefield==0.7.0 dgl-cuda10.0 qcportal ! git clone https://github.com/choderalab/espaloma.git .. code:: python import torch import dgl import numpy as np Get dataset ----------- .. code:: python import os if not os.path.exists("zinc"): os.system("wget data.wangyq.net/esp_datasets/zinc") ds = esp.data.dataset.GraphDataset.load("zinc") Assign legacy typing -------------------- Next, we assign legacy typings using `GAFF-1.81 force field. `__ .. code:: python typing = esp.graphs.legacy_force_field.LegacyForceField('gaff-1.81') ds.apply(typing, in_place=True) # this modify the original data Data massaging -------------- We then split the data into training, test, and validatoin (80:10:10) and batch the the datasets. .. code:: python ds_tr, ds_te, ds_vl = ds.split([8, 1, 1]) .. code:: python ds_tr = ds_tr.view('graph', batch_size=100, shuffle=True) ds_te = ds_te.view('graph', batch_size=100) ds_vl = ds_vl.view('graph', batch_size=100) Defining model -------------- We define a graph neural network (GNN) model with `SAGEConv `__ with 128 units, three layers, and ReLU activation functions. .. code:: python # define a layer layer = esp.nn.layers.dgl_legacy.gn("SAGEConv") # define a representation representation = esp.nn.Sequential( layer, [128, "relu", 128, "relu", 128, "relu"], ) # define a readout readout = esp.nn.readout.node_typing.NodeTyping( in_features=128, n_classes=100 ) net = torch.nn.Sequential( representation, readout ) Define graph-level loss function -------------------------------- .. code:: python loss_fn = esp.metrics.TypingAccuracy() Train the model --------------- .. code:: python # define optimizer optimizer = torch.optim.Adam(net.parameters(), 1e-5) # train the model for _ in range(3000): for g in ds_tr: optimizer.zero_grad() net(g.heterograph) loss = loss_fn(g.heterograph) loss.backward() optimizer.step() ================================================ FILE: docs/index.rst ================================================ .. espaloma documentation master file, created by sphinx-quickstart on Thu Mar 15 13:55:56 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. espaloma: Extensible Surrogate Potential Optimized by Message-passing Algorithms ========================================================= Source code for Wang Y, Fass J, and Chodera JD "End-to-End Differentiable Construction of Molecular Mechanics Force Fields. https://arxiv.org/abs/2010.01196 .. image:: _static/espaloma_abstract_v2-2.png Paper Abstract -------------- Molecular mechanics (MM) potentials have long been a workhorse of computational chemistry. Leveraging accuracy and speed, these functional forms find use in a wide variety of applications in biomolecular modeling and drug discovery, from rapid virtual screening to detailed free energy calculations. Traditionally, MM potentials have relied on human-curated, inflexible, and poorly extensible discrete chemical perception rules *atom types* for applying parameters to small molecules or biopolymers, making it difficult to optimize both types and parameters to fit quantum chemical or physical property data. Here, we propose an alternative approach that uses *graph neural networks* to perceive chemical environments, producing continuous atom embeddings from which valence and nonbonded parameters can be predicted using invariance-preserving layers. Since all stages are built from smooth neural functions, the entire process---spanning chemical perception to parameter assignment---is modular and end-to-end differentiable with respect to model parameters, allowing new force fields to be easily constructed, extended, and applied to arbitrary molecules. We show that this approach is not only sufficiently expressive to reproduce legacy atom types, but that it can learn and extend existing molecular mechanics force fields, construct entirely new force fields applicable to both biopolymers and small molecules from quantum chemical calculations, and even learn to accurately predict free energies from experimental observables. Lab Meeting ----------- .. raw:: html Full video: https://youtu.be/OC210nUuXHk .. toctree:: :maxdepth: 2 :caption: Contents: install deploy experiments/index api Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/install.rst ================================================ Installation ============ mamba ----- We recommend using `mamba `_ which is a drop-in replacement for ``conda`` and is much faster. .. code-block:: bash $ mamba create --name espaloma -c conda-forge "espaloma=0.3.2" ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=malt if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: docs/qm_fitting.rst ================================================ Quantum mechanics (QM) fitting experiment. ========================================== **Open in Google Colab:** http://data.wangyq.net/esp_notesbooks/qm_fitting.ipynb This notebook recovers the QM fitting experiment in https://arxiv.org/abs/2010.01196 |image1| **Table 2:** Espaloma can directly fit quantum chemical energies to produce a new molecular mechanics force fields with better accuracy than traditional force fields based on atom typing or direct chemical perception. Espaloma was fit to quantum chemical potential energies for conformations generated by optimization trajectories from multiple conformers in various datasets from QCArchive.All datasets were partitioned by molecules 80:10:10 into train:validate:test sets. We report the RMSE on training and test sets, as well as the performance of legacy force fields on the test set. All statistics are computed with predicted and reference energies centered to have zero mean for each molecule to focus on errors in relative conformational energetics, rather than on errors in predicting the heats of formation of chemical species (which the MM functional form used here is incapable of). The 95% confidence intervals annotated are calculated by via bootstrapping molecules with replacement using 1000 replicates. \*: Six cyclic peptides that cannot be parametrized using OpenForceField toolkit engine~:raw-latex:`\cite{openff-toolkit-0.10.0}` and is not included. Since Espaloma can derive a force field solely by fitting to energies (and optionally gradients), we repeat the end-to-end fitting experiment (See notebook http://data.wangyq.net/esp_notebooks/phalkethoh_mm_small.ipynb) directly using a quantum chemical (QM) datasets used to build and evaluate MM force fields. We assessed the ability of Espaloma to learn several distinct quantum chemical datasets generated by the Open Force Field Initiativeand deposited in the MolSSI QCArchive: - **PhAlkEthOH** is a collection of compounds containing only the elements carbon, hydrogen, and oxygen in compounds containing phenyl rings, alkanes, ketones, and alcohols. Limited in elemental and chemical diversity, this dataset is chosen as a proof-of-concept to demonstrate the capability of Espaloma to fit and generalize quantum chemical energies when training data is sufficient to exhaustively cover the breadth of chemical environments. - **OpenFF Gen2 Optimization** consists of druglike molecules used in the parametrization of the Open Force Field 1.2.0 (“Parsley”) small molecule force field. This set was constructed by the Open Force Field Consortium from challenging molecule structures provided by Pfizer, Bayer, and Roche, along with diverse molecules selected from eMolecules to achieve useful coverage of chemical space. - **VEHICLe**, or *virtual exploratory heterocyclic library*, is a set of heteroaromatic ring systems of interest to drug discovery. The atoms in the molecules in this dataset have interesting chemical environments in heteroarmatic rings that present a challenge to traditional atom typing schemes, which cannot easily accomodate the nuanced distinctions in chemical environments that lead to perturbations in heterocycle structure.We use this dataset to illustrate that Espaloma performs in situations challenging to traditional force fields. - **PepConf** contains a variety of short peptides, including capped, cyclic, and disulfide-bonded peptides.This dataset—regenerated using the Open Force Field QCSubmit tool—explores the applicability of Espaloma to biopolymers, such as proteins. Since nonbonded terms are generally optimized to fit other condensed-phase properties, we focused here on optimizing only the valence parameters (bond, angle, and proper and improper torsion) to fit these gas-phase quantum chemical datasets, fixing the non-bonded energies using a legacy force field. Because we are learning an MM force field that is incapable of reproducing quantum chemical heats of formation reflected as an additive offset in the quantum chemical energy targets, in both training and test sets, snapshot energies for each molecule are shifted to have zero mean. All datasets are randomly shuffled and split (by molecules) into training (80%), validation (10%), and test (10%) sets. .. |image1| image:: https://pbs.twimg.com/media/FBL1Gb0WEAYkUhM?format=png&name=4096x4096 Installation and imports ------------------------ .. code:: python # install conda ! pip install -q condacolab import condacolab condacolab.install() .. code:: python %%capture ! mamba install --yes --strict-channel-priority --channel jaimergp/label/unsupported-cudatoolkit-shim --channel omnia --channel omnia/label/cuda100 --channel dglteam --channel numpy openmm openmmtools openmmforcefields rdkit openff-toolkit dgl-cuda10.0 qcportal .. code:: python ! git clone https://github.com/choderalab/espaloma.git .. code:: python import torch import sys sys.path.append("/content/espaloma") import espaloma as esp Load dataset ------------ Choose a dataset from ``["gen2", "pepconf", "vehicle", "phalkethoh"]``. .. code:: python dataset_name = "gen2" # dataset_name = "pepconf" # dataset_name = "vehicle" # dataset_name = "phalkethoh" .. code:: python %%capture ! wget "data.wangyq.net/esp_dataset/"$dataset_name".zip" ! unzip $dataset_name".zip" .. code:: python ds = esp.data.dataset.GraphDataset.load(dataset_name) ds.shuffle(seed=2666) ds_tr, ds_vl, ds_te = ds.split([8, 1, 1]) Define model ------------ Define Espaloma stage I: graph -> atom latent representation .. code:: python representation = esp.nn.Sequential( layer=esp.nn.layers.dgl_legacy.gn("SAGEConv"), # use SAGEConv implementation in DGL config=[128, "relu", 128, "relu", 128, "relu"], # 3 layers, 128 units, ReLU activation ) Define Espaloma stage II and III: atom latent representation -> bond, angle, and torsion representation and parameters. And compose all three Espaloma stages into an end-to-end model. .. code:: python readout = esp.nn.readout.janossy.JanossyPooling( in_features=128, config=[128, "relu", 128, "relu", 128, "relu"], out_features={ # define modular MM parameters Espaloma will assign 1: {"e": 1, "s": 1}, # atom hardness and electronegativity 2: {"log_coefficients": 2}, # bond linear combination, enforce positive 3: {"log_coefficients": 2}, # angle linear combination, enforce positive 4: {"k": 6}, # torsion barrier heights (can be positive or negative) }, ) espaloma_model = torch.nn.Sequential( representation, readout, esp.nn.readout.janossy.ExpCoefficients(), esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(), ) .. code:: python if torch.cuda.is_available(): espaloma_model = espaloma_model.cuda() Loss function is specified as the MSE between predicted and reference energy. .. code:: python loss_fn = esp.metrics.GraphMetric( base_metric=torch.nn.MSELoss(), # use mean-squared error loss between=['u', "u_ref"], # between predicted and QM energies level="g", # compare on graph level ) Define optimizer ---------------- .. code:: python optimizer = torch.optim.Adam(espaloma_model.parameters(), 1e-4) Train it! --------- .. code:: python for idx_epoch in range(10000): for g in ds_tr: optimizer.zero_grad() if torch.cuda.is_available(): g.heterograph = g.heterograph.to("cuda:0") g = espaloma_model(g.heterograph) loss = loss_fn(g) loss.backward() optimizer.step() torch.save(espaloma_model.state_dict(), "%s.th" % idx_epoch) Inspect ------- .. code:: python inspect_metric = esp.metrics.center(torch.nn.L1Loss()) # use mean-squared error loss .. code:: python loss_tr = [] loss_vl = [] .. code:: python with torch.no_grad(): for idx_epoch in range(10000): espaloma_model.load_state_dict( torch.load("%s.th" % idx_epoch) ) # training set performance u = [] u_ref = [] for g in ds_tr: if torch.cuda.is_available(): g.heterograph = g.heterograph.to("cuda:0") espaloma_model(g.heterograph) u.append(g.nodes['g'].data['u']) u_ref.append(g.nodes['g']) u = torch.cat(u, dim=0) u_ref = torch.cat(u_ref, dim=0) loss_tr.append(inspect_metric(u, u_ref)) # validation set performance u = [] u_ref = [] for g in ds_vl: if torch.cuda.is_available(): g.heterograph = g.heterograph.to("cuda:0") espaloma_model(g.heterograph) u.append(g.nodes['g'].data['u']) u_ref.append(g.nodes['g']) u = torch.cat(u, dim=0) u_ref = torch.cat(u_ref, dim=0) loss_vl.append(inspect_metric(u, u_ref)) .. code:: python import numpy as np loss_tr = np.array(loss_tr) * 627.5 loss_vl = np.array(loss_vl) * 627.5 .. code:: python from matplotlib import pyplot as plt plt.plot(loss_tr, label="train") plt.plot(loss_vl, label="valid") plt.yscale("log") plt.legend() ================================================ FILE: espaloma/.py ================================================ ================================================ FILE: espaloma/__init__.py ================================================ """ espaloma Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm """ from . import metrics, units, data, app, graphs, mm, nn from .app.experiment import * from .graphs.graph import Graph from .metrics import GraphMetric from .mm.geometry import * from .utils.model_fetch import get_model, get_model_path # Add imports here # import espaloma # Handle versioneer from ._version import get_versions # # from openff.toolkit.utils.toolkits import ToolkitRegistry, OpenEyeToolkitWrapper, RDKitToolkitWrapper, AmberToolsToolkitWrapper # toolkit_registry = ToolkitRegistry() # toolkit_precedence = [ RDKitToolkitWrapper ] # , OpenEyeToolkitWrapper, AmberToolsToolkitWrapper] # [ toolkit_registry.register_toolkit(toolkit) for toolkit in toolkit_precedence if toolkit.is_available() ] # versions = get_versions() __version__ = versions["version"] __git_revision__ = versions["full-revisionid"] del get_versions, versions from . import _version __version__ = _version.get_versions()['version'] ================================================ FILE: espaloma/_version.py ================================================ # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. # Generated by versioneer-0.29 # https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" import errno import os import re import subprocess import sys from typing import Any, Callable, Dict, List, Optional, Tuple import functools def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" git_date = "$Format:%ci$" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" VCS: str style: str tag_prefix: str parentdir_prefix: str versionfile_source: str verbose: bool def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "None" cfg.versionfile_source = "espaloma/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY: Dict[str, str] = {} HANDLERS: Dict[str, Dict[str, Callable]] = {} def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command( commands: List[str], args: List[str], cwd: Optional[str] = None, verbose: bool = False, hide_stderr: bool = False, env: Optional[Dict[str, str]] = None, ) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW popen_kwargs["startupinfo"] = startupinfo for command in commands: try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git process = subprocess.Popen([command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), **popen_kwargs) break except OSError as e: if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, process.returncode return stdout, process.returncode def versions_from_parentdir( parentdir_prefix: str, root: str, verbose: bool, ) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) except OSError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords( keywords: Dict[str, str], tag_prefix: str, verbose: bool, ) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') if not re.match(r'\d', r): continue if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs( tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command ) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # GIT_DIR can interfere with correct operation of Versioneer. # It may be intended to be passed to the Versioneer-versioned project, # but that should not change where we get our version from. env = os.environ.copy() env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = runner(GITS, [ "describe", "--tags", "--dirty", "--always", "--long", "--match", f"{tag_prefix}[[:digit:]]*" ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") branch_name = branch_name.strip() if branch_name == "HEAD": # If we aren't exactly on a branch, pick a branch which represents # the current commit. If all else fails, we are on a branchless # commit. branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) # --contains was added in git-1.5.4 if rc != 0 or branches is None: raise NotThisMethod("'git branch --contains' returned error") branches = branches.split("\n") # Remove the first line if we're running detached if "(" in branches[0]: branches.pop(0) # Strip off the leading "* " from the list of branches. branches = [branch[2:] for branch in branches] if "master" in branches: branch_name = "master" elif not branches: branch_name = None else: # Pick the first branch that is returned. Good or bad. branch_name = branches[0] pieces["branch"] = branch_name # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards (a feature branch will appear "older" than the master branch). Exceptions: 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the post-release version number (or -1 if no post-release segment is present). """ vc = str.split(ver, ".post") return vc[0], int(vc[1] or 0) if len(vc) == 2 else None def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: if pieces["distance"]: # update the post release segment tag_version, post_version = pep440_split_post(pieces["closest-tag"]) rendered = tag_version if post_version is not None: rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) else: rendered += ".post0.dev%d" % (pieces["distance"]) else: # no commits, use the tag as the version rendered = pieces["closest-tag"] else: # exception #1 rendered = "0.post0.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. Exceptions: 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += "+g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-branch": rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-post-branch": rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ================================================ FILE: espaloma/app/__init__.py ================================================ from . import experiment, report ================================================ FILE: espaloma/app/experiment.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import abc import copy import torch import espaloma as esp # ============================================================================= # MODULE CLASSES # ============================================================================= class Experiment(abc.ABC): """Base class for espaloma experiment.""" def __init__(self): super(Experiment, self).__init__() class Train(Experiment): """Training experiment. Parameters ---------- net : `torch.nn.Module` Neural networks that inputs graph representation and outputs parameterized or typed graph for molecular mechanics. data : `espaloma.data.dataset.Dataset` or `torch.utils.data.DataLoader` Dataset. metrics : `List` of `callable` List of loss functions to be used (summed) in training. optimizer : `torch.optim.Optimizer` Optimizer used for training. n_epochs : `int` Number of epochs. record_interval : `int` Interval at which states are recorded. Methods ------- train_once : Train the network for exactly once. train : Execute `train_once` for `n_epochs` times and record states every `record_interval`. """ def __init__( self, net, data, metrics=[esp.metrics.TypingCrossEntropy()], optimizer=lambda net: torch.optim.Adam(net.parameters(), 1e-3), n_epochs=100, record_interval=1, normalize=esp.data.normalize.ESOL100LogNormalNormalize, scheduler=None, device=torch.device("cpu"), ): super(Train, self).__init__() # bookkeeping self.device = device if isinstance(net, torch.nn.DataParallel): self.net = net elif isinstance(net, torch.nn.parallel.DistributedDataParallel): self.net = net else: self.net = net.to(self.device) self.data = data self.metrics = metrics self.n_epochs = n_epochs self.record_interval = record_interval self.normalize = normalize() self.states = {} self.scheduler = scheduler # make optimizer if callable(optimizer): self.optimizer = optimizer(net) else: self.optimizer = optimizer # compose loss function def loss(g): _loss = 0.0 for metric in self.metrics: _loss += metric(g) return _loss self.loss = loss def train_once(self): """Train the model for one batch.""" for idx, g in enumerate( self.data ): # TODO: does this have to be a single g? if isinstance(self.optimizer, torch.optim.LBFGS): retain_graph = True else: retain_graph = False g = g.to(self.device) self.net.train() def closure(g=g): self.optimizer.zero_grad() g = self.net(g) g = self.normalize.unnorm(g) loss = self.loss(g) loss.backward(retain_graph=retain_graph) if idx == 0: if torch.isnan(loss).cpu().numpy().item() is True: raise RuntimeError("Loss is Nan.") return loss loss = closure() self.optimizer.step() if self.scheduler is not None: self.scheduler.step(loss) def train(self): """Train the model for multiple steps and record the weights once every `record_interval` """ for epoch_idx in range(int(self.n_epochs)): self.train_once() # record when `record_interval` is hit if epoch_idx % self.record_interval == 0: self.states[epoch_idx] = copy.deepcopy(self.net.state_dict()) # record final state self.states["final"] = copy.deepcopy(self.net.state_dict()) return self.net class Test(Experiment): """Test experiment. Parameters ---------- net : `torch.nn.Module` Neural networks that inputs graph representation and outputs parameterized or typed graph for molecular mechanics. data : `espaloma.data.dataset.Dataset` or `torch.utils.data.DataLoader` Dataset. metrics : `List` of `callable` List of loss functions to be used (summed) in training. """ def __init__( self, net, data, states, metrics=[esp.metrics.TypingCrossEntropy()], normalize=esp.data.normalize.NotNormalize, sampler=None, device=torch.device("cpu"), # it should cpu ): # bookkeeping self.device = device self.net = net.to(self.device) self.data = data self.states = states self.metrics = metrics self.sampler = sampler self.normalize = normalize() def test(self): """Run tests.""" results = {} # loop through the metrics for metric in self.metrics: results[metric.__name__] = {} # NOTE: we are not doing this here since this will lead to OOM # from time to time # make it just one giant graph # g = list(self.data) # g = dgl.batch(g) # g = g.to(self.device) if self.states is None: self.states = {"final": None} for state_name, state in self.states.items(): # loop through states if state is not None: # load the state dict self.net.load_state_dict(state) self.net.eval() for metric in self.metrics: assert isinstance(metric, esp.metrics.Metric) input_fn, target_fn = metric.between inputs = [] targets = [] for g in self.data: with g.local_scope(): g = g.to(self.device) g_input = self.normalize.unnorm(self.net(g)) inputs.append(input_fn(g_input).detach()) targets.append(target_fn(g_input).detach()) inputs = torch.cat(inputs, dim=0) targets = torch.cat(targets, dim=0) # loop through the metrics results[metric.__name__][state_name] = ( metric.base_metric(inputs, targets).detach().cpu().numpy() ) self.ref_g = self.normalize.unnorm(self.net(g)).to( torch.device("cpu") ) for term in self.ref_g.ntypes: for param in self.ref_g.nodes[term].data.keys(): g.nodes[term].data[param] = g.nodes[term].data[param].detach() # point this to self self.results = results class TrainAndTest(Experiment): """Train a model and then test it.""" def __init__( self, net, ds_tr, ds_te, ds_vl=None, metrics_tr=[esp.metrics.TypingCrossEntropy()], metrics_te=[esp.metrics.TypingCrossEntropy()], optimizer=lambda net: torch.optim.Adam(net.parameters(), 1e-2), normalize=esp.data.normalize.NotNormalize, n_epochs=100, record_interval=1, device=torch.device("cpu"), scheduler=None, ): # bookkeeping self.device = device self.net = net self.ds_tr = ds_tr self.ds_te = ds_te self.ds_vl = ds_vl self.optimizer = optimizer self.n_epochs = n_epochs self.metrics_tr = metrics_tr self.metrics_te = metrics_te self.normalize = normalize self.record_interval = record_interval self.scheduler = scheduler def __str__(self): _str = "" _str += "# model" _str += "\n" _str += str(self.net) _str += "\n" if hasattr(self.net, "noise_model"): _str += "# noise model" _str += "\n" _str += str(self.net.noise_model) _str += "\n" _str += "# optimizer" _str += "\n" _str += str(self.optimizer) _str += "\n" _str += "# n_epochs" _str += "\n" _str += str(self.n_epochs) _str += "\n" return _str def run(self): """Run train and test.""" train = Train( net=self.net, data=self.ds_tr, optimizer=self.optimizer, n_epochs=self.n_epochs, metrics=self.metrics_tr, normalize=self.normalize, device=self.device, record_interval=self.record_interval, scheduler=self.scheduler, ) train.train() self.states = train.states test = Test( net=self.net, data=self.ds_te, metrics=self.metrics_te, states=self.states, normalize=self.normalize, device=self.device, ) test.test() self.ref_g_test = test.ref_g self.results_te = test.results test = Test( net=self.net, data=self.ds_tr, metrics=self.metrics_te, states=self.states, normalize=self.normalize, device=self.device, ) test.test() self.ref_g_training = test.ref_g self.results_tr = test.results if self.ds_vl is not None: test = Test( net=self.net, data=self.ds_vl, metrics=self.metrics_te, states=self.states, normalize=self.normalize, device=self.device, ) test.test() self.ref_g_validation = test.ref_g self.results_vl = test.results return { "test": self.results_te, "train": self.results_tr, "validate": self.results_vl, } return {"test": self.results_te, "train": self.results_tr} ================================================ FILE: espaloma/app/report.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import numpy as np import pandas as pd # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def dataframe(results_dict): # get all the results metrics = list(list(results_dict.values())[0].keys()) ds_names = list(results_dict.keys()) df = pd.DataFrame( [ [value["final"].round(4) for metric, value in results.items()] for ds_name, results in results_dict.items() ], columns=metrics, index=ds_names, ) return df def curve(results_dict): curve_dict = {} # get all the results metrics = list(list(results_dict.values())[0].keys()) # loop through metrics for idx_metric, metric in enumerate(metrics): # loop through the results for ds_name, results in results_dict.items(): # get all the recorded indices idxs = list( [ key for key in results[metric].keys() if isinstance(key, int) ] ) curve_dict[(metric, ds_name)] = np.array( [results[metric][idx] for idx in idxs] ) return curve_dict def markdown(results_dict): df = dataframe(results_dict) return df.transpose().to_markdown() def visual(results_dict): # make plots less ugly from matplotlib import pyplot as plt plt.rc("font", size=14) plt.rc("lines", linewidth=6) # initialize the figure fig = plt.figure(figsize=(8, 3)) # get all the results metrics = list(list(results_dict.values())[0].keys()) n_metrics = len(metrics) # loop through metrics for idx_metric, metric in enumerate(metrics): ax = plt.subplot(1, n_metrics, idx_metric + 1) # loop through the results for ds_name, results in results_dict.items(): # get all the recorded indices idxs = list( [ key for key in results[metric].keys() if isinstance(key, int) ] ) # sort it ascending idxs.sort() ax.plot( idxs, [results[metric][idx] for idx in idxs], label=ds_name ) ax.set_xlabel("epochs") ax.set_ylabel(metric) plt.tight_layout() plt.legend() return fig def visual_multiple(results_dicts): from matplotlib import cm as cm from matplotlib import pyplot as plt plt.rc("font", size=14) plt.rc("lines", linewidth=4) # initialize the figure fig = plt.figure() # get all the results metrics = list(list(results_dicts[0][1].values())[0].keys()) n_metrics = len(metrics) # loop through metrics for idx_metric, metric in enumerate(metrics): ax = plt.subplot(n_metrics, 1, idx_metric + 1) # loop through results for idx_result, config_and_results_dict in enumerate(results_dicts): config, results_dict = config_and_results_dict for ds_name, results in results_dict.items(): # get all the recorded indices idxs = list( [ key for key in results[metric].keys() if isinstance(key, int) ] ) # sort it ascending idxs.sort() label = None linestyle = "dotted" if ds_name == "training": label = config["#"] linestyle = "solid" ax.plot( idxs, [results[metric][idx] for idx in idxs], label=label, c=cm.gist_rainbow( (float(idx_result) / len(results_dicts)) ), linestyle=linestyle, alpha=0.8, ) ax.set_xlabel("epochs") ax.set_ylabel(metric) plt.legend(bbox_to_anchor=(1.04, 0), loc="lower left") plt.tight_layout() return fig def visual_base64(results_dict): fig = visual(results_dict) import base64 import io img = io.BytesIO() fig.savefig(img, format="png", dpi=50) img.seek(0) img = base64.b64encode(img.read()).decode("utf-8") # img = "![img](data:image/png;base64%s)" % img return img def html(results_dict): html_string = "" if isinstance(results_dict, dict): results_dict = [results_dict] for _results_dict in results_dict: html_string += """

%s



""" % ( visual_base64(_results_dict)[:-1], dataframe(_results_dict).to_html(), ) return html_string def html_multiple_train_and_test(results): html_string = "" for param, result in results: html_string += "




" + str(param) + "

" html_string += html(result) html_string += "


" return html_string def html_multiple_train_and_test_2d_grid(results): # make sure there are only two paramter types import copy results = copy.deepcopy(results) for result in results: result[0].pop("#") param_names = list(results[0][0].keys()) assert len(param_names) == 2 param_col_name, param_row_name = param_names param_col_values = list( set([result[0][param_col_name] for result in results]) ) param_row_values = list( set([result[0][param_row_name] for result in results]) ) param_col_values.sort() param_row_values.sort() # initialize giant table in nested lists table = [["NA" for _ in param_col_values] for _ in param_row_values] # populate this table for idx_col, param_col in enumerate(param_col_values): for idx_row, param_row in enumerate(param_row_values): param_dict = { param_col_name: param_col, param_row_name: param_row, } # TODO: # make this less ugly for result in results: if result[0] == param_dict: table[idx_row][idx_col] = html(result[1]) html_string = "" html_string += "" # first row html_string += "" html_string += ( "" ) for param_col in param_col_values: html_string += ( "" ) html_string += "" # the rest of the rows for idx_row, param_row in enumerate(param_row_values): html_string += "" # html_string += "" html_string += ( "" ) for idx_col, param_col in enumerate(param_col_values): html_string += ( "" ) html_string += "" html_string += "
" + param_row_name + "/" + param_col_name + "" + str(param_col) + "
" + param_row + " " + table[idx_row][idx_col] + "
" return html_string ================================================ FILE: espaloma/app/tests/test_experiment.py ================================================ import pytest import torch def test_import(): import espaloma as esp esp.app.experiment @pytest.fixture def data(): import espaloma as esp esol = esp.data.esol(first=20) # do some typing typing = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81") esol.apply(typing, in_place=True) # this modify the original data return esol.view("graph", batch_size=10) @pytest.fixture def net(): import espaloma as esp # define a layer layer = esp.nn.layers.dgl_legacy.gn("GraphConv") # define a representation representation = esp.nn.Sequential( layer, [32, "tanh", 32, "tanh", 32, "tanh"] ) # define a readout readout = esp.nn.readout.node_typing.NodeTyping( in_features=32, n_classes=100 ) # not too many elements here I think? net = torch.nn.Sequential(representation, readout) return net def test_data_and_net(data, net): data net @pytest.fixture def train(data, net): import espaloma as esp train = esp.app.experiment.Train( net=net, data=data, n_epochs=1, metrics=[ esp.metrics.GraphMetric( base_metric=torch.nn.CrossEntropyLoss(), between=["nn_typing", "legacy_typing"], ) ], ) return train def test_train(train): train.train() def test_test(train, net, data): import espaloma as esp train.train() test = esp.app.experiment.Test(net=net, data=data, states=train.states) def test_train_and_test(net, data): import espaloma as esp train_and_test = esp.app.experiment.TrainAndTest( net=net, n_epochs=1, ds_tr=data, ds_te=data ) ================================================ FILE: espaloma/app/train.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import argparse import os import numpy as np import torch import espaloma as esp def run(args): # define data data = getattr(esp.data, args.data)(first=args.first) # get force field forcefield = esp.graphs.legacy_force_field.LegacyForceField( args.forcefield ) # param / typing operation = getattr(forcefield, args.operation) # apply to dataset data = data.apply(operation, in_place=True) # split partition = [int(x) for x in args.partition.split(":")] ds_tr, ds_te = data.split(partition) # batch ds_tr = ds_tr.view("graph", batch_size=args.batch_size) ds_te = ds_te.view("graph", batch_size=args.batch_size) # layer layer = esp.nn.layers.dgl_legacy.gn(args.layer) # representation representation = esp.nn.Sequential(layer, config=args.config) # get the last bit of units units = [x for x in args.config if isinstance(x, int)][-1] # readout if args.readout == "node_typing": readout = esp.nn.readout.node_typing.NodeTyping( in_features=units, n_classes=args.n_classes ) if args.readout == "janossy": readout = esp.nn.readout.janossy.JanossyPooling( in_features=units, config=args.janossy_config ) net = torch.nn.Sequential(representation, readout) training_metrics = [ getattr(esp.metrics, metric)() for metric in args.training_metrics ] test_metrics = [ getattr(esp.metrics, metric)() for metric in args.test_metrics ] exp = esp.TrainAndTest( ds_tr=ds_tr, ds_te=ds_te, net=net, metrics_tr=[ getattr(esp.metrics, metric)() for metric in args.training_metrics ], metrics_te=[ getattr(esp.metrics, metric)() for metric in args.test_metrics ], n_epochs=args.n_epochs, ) results = exp.run() print(esp.app.report.markdown(results)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--data", default="esol", type=str) parser.add_argument("--first", default=-1, type=int) parser.add_argument("--readout", default="node_typing", type=str) parser.add_argument("--partition", default="4:1", type=str) parser.add_argument("--batch_size", default=8, type=int) parser.add_argument("--forcefield", default="gaff-1.81", type=str) parser.add_argument("--operation", default="typing", type=str) parser.add_argument("--layer", default="GraphConv", type=str) parser.add_argument("--n_classes", default=100, type=int) parser.add_argument( "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"] ) parser.add_argument( "--training_metrics", nargs="*", default=["TypingCrossEntropy"] ) parser.add_argument( "--test_metrics", nargs="*", default=["TypingAccuracy"] ) parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"]) parser.add_argument("--n_epochs", default=10, type=int) args = parser.parse_args() run(args) ================================================ FILE: espaloma/app/train_all_params.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import argparse import numpy as np import torch import espaloma as esp def run(args): # define data data = getattr(esp.data, args.data)(first=args.first) # get force field forcefield = esp.graphs.legacy_force_field.LegacyForceField( args.forcefield ) # param / typing operation = forcefield.parametrize # apply to dataset data = data.apply(operation, in_place=True) # split partition = [int(x) for x in args.partition.split(":")] ds_tr, ds_te = data.split(partition) # batch ds_tr = ds_tr.view("graph", batch_size=args.batch_size) ds_te = ds_te.view("graph", batch_size=args.batch_size) # layer layer = esp.nn.layers.dgl_legacy.gn(args.layer) # representation representation = esp.nn.Sequential(layer, config=args.config) # get the last bit of units units = [x for x in args.config if isinstance(x, int)][-1] readout = esp.nn.readout.janossy.JanossyPooling( in_features=units, config=args.janossy_config, out_features={ 2: ["k", "eq"], 3: ["k", "eq"], }, ) net = torch.nn.Sequential(representation, readout) metrics_tr = [ esp.metrics.GraphMetric( base_metric=torch.nn.L1Loss(), between=[param, param + "_ref"], level=term, ) for param in ["k", "eq"] for term in ["n2", "n3"] ] metrics_te = [ esp.metrics.GraphMetric( base_metric=base_metric, between=[param, param + "_ref"], level=term, ) for param in ["k", "eq"] for term in ["n2", "n3"] for base_metric in [esp.metrics.rmse, esp.metrics.r2] ] exp = esp.TrainAndTest( ds_tr=ds_tr, ds_te=ds_te, net=net, metrics_tr=metrics_tr, metrics_te=metrics_te, n_epochs=args.n_epochs, ) results = exp.run() print(esp.app.report.markdown(results)) import os os.mkdir(args.out) with open(args.out + "/architecture.txt", "w") as f_handle: f_handle.write(str(exp)) with open(args.out + "/result_table.md", "w") as f_handle: f_handle.write(esp.app.report.markdown(results)) curves = esp.app.report.curve(results) for spec, curve in curves.items(): np.save(args.out + "/" + "_".join(spec) + ".npy", curve) import pickle with open(args.out + "/ref_g_test.th", "wb") as f_handle: pickle.dump(exp.ref_g_test, f_handle) with open(args.out + "/ref_g_training.th", "wb") as f_handle: pickle.dump(exp.ref_g_training, f_handle) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--data", default="alkethoh", type=str) parser.add_argument("--out", default="results", type=str) parser.add_argument("--first", default=-1, type=int) parser.add_argument("--partition", default="4:1", type=str) parser.add_argument("--batch_size", default=8, type=int) parser.add_argument( "--forcefield", default="smirnoff99Frosst-1.1.0", type=str ) parser.add_argument("--layer", default="GraphConv", type=str) parser.add_argument("--n_classes", default=100, type=int) parser.add_argument( "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"] ) parser.add_argument( "--training_metrics", nargs="*", default=["TypingCrossEntropy"] ) parser.add_argument( "--test_metrics", nargs="*", default=["TypingAccuracy"] ) parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"]) parser.add_argument("--n_epochs", default=10, type=int) args = parser.parse_args() run(args) ================================================ FILE: espaloma/app/train_bonded_energy.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import argparse import os import numpy as np import torch import espaloma as esp def run(args): # define data data = getattr(esp.data, args.data)(first=args.first) # get force field forcefield = esp.graphs.legacy_force_field.LegacyForceField( args.forcefield ) # param / typing operation = forcefield.parametrize # apply to dataset data = data.apply(operation, in_place=True) # apply simulation # make simulation from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation( n_samples=1000, n_steps_per_sample=10 ) data = data.apply(simulation.run, in_place=True) # split partition = [int(x) for x in args.partition.split(":")] ds_tr, ds_te = data.split(partition) # batch ds_tr = ds_tr.view("graph", batch_size=args.batch_size) ds_te = ds_te.view("graph", batch_size=args.batch_size) # layer layer = esp.nn.layers.dgl_legacy.gn(args.layer) # representation representation = esp.nn.Sequential(layer, config=args.config) # get the last bit of units units = [x for x in args.config if isinstance(x, int)][-1] readout = esp.nn.readout.janossy.JanossyPooling( in_features=units, config=args.janossy_config, ) net = torch.nn.Sequential( representation, readout, esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(), esp.mm.energy.EnergyInGraph(suffix="_ref"), ) metrics_tr = [ esp.metrics.GraphMetric( base_metric=torch.nn.L1Loss(), between=["u", "u_ref"], level="g" ) ] metrics_te = [ esp.metrics.GraphMetric( base_metric=base_metric, between=[param, param + "_ref"], level=term, ) for param in ["u"] for term in ["g"] for base_metric in [esp.metrics.rmse, esp.metrics.r2] ] optimizer = getattr(torch.optim, args.optimizer)( net.parameters(), lr=args.lr ) exp = esp.TrainAndTest( ds_tr=ds_tr, ds_te=ds_te, net=net, metrics_tr=metrics_tr, metrics_te=metrics_te, n_epochs=args.n_epochs, normalize=esp.data.normalize.PositiveNotNormalize, ) results = exp.run() print(esp.app.report.markdown(results)) import os os.mkdir(args.out) with open(args.out + "/architecture.txt", "w") as f_handle: f_handle.write(str(exp)) with open(args.out + "/result_table.md", "w") as f_handle: f_handle.write(esp.app.report.markdown(results)) curves = esp.app.report.curve(results) for spec, curve in curves.items(): np.save(args.out + "/" + "_".join(spec) + ".npy", curve) import pickle with open(args.out + "/ref_g_test.th", "wb") as f_handle: pickle.dump(exp.ref_g_test, f_handle) with open(args.out + "/ref_g_training.th", "wb") as f_handle: pickle.dump(exp.ref_g_training, f_handle) print(esp.app.report.markdown(results)) import pickle with open(args.out + "/ref_g_test.th", "wb") as f_handle: pickle.dump(exp.ref_g_test, f_handle) with open(args.out + "/ref_g_training.th", "wb") as f_handle: pickle.dump(exp.ref_g_training, f_handle) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--data", default="alkethoh", type=str) parser.add_argument("--first", default=-1, type=int) parser.add_argument("--partition", default="4:1", type=str) parser.add_argument("--batch_size", default=8, type=int) parser.add_argument( "--forcefield", default="smirnoff99Frosst-1.1.0", type=str ) parser.add_argument("--layer", default="GraphConv", type=str) parser.add_argument("--n_classes", default=100, type=int) parser.add_argument( "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"] ) parser.add_argument( "--training_metrics", nargs="*", default=["TypingCrossEntropy"] ) parser.add_argument( "--test_metrics", nargs="*", default=["TypingAccuracy"] ) parser.add_argument("--out", default="results", type=str) parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"]) parser.add_argument("--n_epochs", default=10, type=int) parser.add_argument("--optimizer", default="Adam", type=str) parser.add_argument("--lr", default=1e-3, type=float) args = parser.parse_args() run(args) ================================================ FILE: espaloma/app/train_multi_typing.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import argparse import torch import espaloma as esp def run(args): # define data data = getattr(esp.data, args.data)(first=args.first) # get force field forcefield = esp.graphs.legacy_force_field.LegacyForceField( args.forcefield ) # param / typing operation = forcefield.multi_typing # apply to dataset data = data.apply(operation, in_place=True) # split partition = [int(x) for x in args.partition.split(":")] ds_tr, ds_te = data.split(partition) # batch ds_tr = ds_tr.view("graph", batch_size=args.batch_size) ds_te = ds_te.view("graph", batch_size=args.batch_size) # layer layer = esp.nn.layers.dgl_legacy.gn(args.layer) # representation representation = esp.nn.Sequential(layer, config=args.config) # get the last bit of units units = [x for x in args.config if isinstance(x, int)][-1] readout = esp.nn.readout.janossy.JanossyPooling( in_features=units, config=args.janossy_config, out_features={ 1: {"nn_typing": 100}, 2: {"nn_typing": 100}, 3: {"nn_typing": 100}, }, ) net = torch.nn.Sequential(representation, readout) metrics_tr = [ esp.metrics.GraphMetric( base_metric=torch.nn.CrossEntropyLoss(), between=["nn_typing", "legacy_typing"], level=term, ) for term in ["n1", "n2", "n3"] ] metrics_te = [ esp.metrics.GraphMetric( base_metric=esp.metrics.accuracy, between=["nn_typing", "legacy_typing"], level=term, ) for term in ["n1", "n2", "n3"] ] exp = esp.TrainAndTest( ds_tr=ds_tr, ds_te=ds_te, net=net, metrics_tr=metrics_tr, metrics_te=metrics_te, n_epochs=args.n_epochs, ) results = exp.run() print(esp.app.report.markdown(results)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--data", default="alkethoh", type=str) parser.add_argument("--first", default=-1, type=int) parser.add_argument("--partition", default="4:1", type=str) parser.add_argument("--batch_size", default=8, type=int) parser.add_argument( "--forcefield", default="smirnoff99Frosst-1.1.0", type=str ) parser.add_argument("--layer", default="GraphConv", type=str) parser.add_argument("--n_classes", default=100, type=int) parser.add_argument( "--config", nargs="*", default=[32, "tanh", 32, "tanh", 32, "tanh"] ) parser.add_argument( "--training_metrics", nargs="*", default=["TypingCrossEntropy"] ) parser.add_argument( "--test_metrics", nargs="*", default=["TypingAccuracy"] ) parser.add_argument("--janossy_config", nargs="*", default=[32, "tanh"]) parser.add_argument("--n_epochs", default=10, type=int) args = parser.parse_args() run(args) ================================================ FILE: espaloma/data/__init__.py ================================================ """ Handles the dataset and collections of espaloma. """ from . import dataset, md, normalize, utils, qcarchive_utils, md17_utils from .collection import * ================================================ FILE: espaloma/data/collection.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import espaloma as esp # ============================================================================= # MODULE CLASSES # ============================================================================= def esol(*args, **kwargs): """ESOL collection. ..[1] ESOL:  Estimating Aqueous Solubility Directly from Molecular Structure John S. Delaney Journal of Chemical Information and Computer Sciences 2004 44 (3), 1000-1005 DOI: 10.1021/ci034243x """ import os import pandas as pd path = os.path.dirname(esp.__file__) + "/data/esol.csv" df = pd.read_csv(path) smiles = df.iloc[:, -1] return esp.data.dataset.GraphDataset(smiles, *args, **kwargs) def alkethoh(*args, **kwargs): """AlkEthOH collection. ..[1] Open Force Field Consortium: Escaping atom types using direct chemical perception with SMIRNOFF v0.1 David L. Mobley, Caitlin C. Bannan, Andrea Rizzi, Christopher I. Bayly, John D. Chodera, Victoria T. Lim, Nathan M. Lim, Kyle A. Beauchamp, Michael R. Shirts, Michael K. Gilson, Peter K. Eastman bioRxiv 286542; doi: https://doi.org/10.1101/286542 """ import os import pandas as pd df = pd.concat( [ pd.read_csv( "https://raw.githubusercontent.com/openff.toolkit/" "open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/" "AlkEthOH_rings.smi", header=None, ), pd.read_csv( "https://raw.githubusercontent.com/openff.toolkit/" "open-forcefield-data/master/Model-Systems/AlkEthOH_distrib/" "AlkEthOH_chain.smi", header=None, ), ], axis=0, ) smiles = df.iloc[:, 0].values return esp.data.dataset.GraphDataset(smiles, *args, **kwargs) def zinc(first=-1, *args, **kwargs): """ZINC collection. ..[1] Irwin, John J, and Brian K Shoichet. “ZINC --a free database of commercially available compounds for virtual screening.” Journal of chemical information and modeling vol. 45,1 (2005): 177-82. doi:10.1021/ci049714+ """ import tarfile from os.path import exists from openff.toolkit.topology import Molecule from rdkit import Chem fname = "parm_at_Frosst.tgz" url = "http://www.ccl.net/cca/data/parm_at_Frosst/parm_at_Frosst.tgz" if not exists(fname): import urllib.request urllib.request.urlretrieve(url, fname) archive = tarfile.open(fname) zinc_file = archive.extractfile("parm_at_Frosst/zinc.sdf") _mols = Chem.ForwardSDMolSupplier(zinc_file, removeHs=False) count = 0 gs = [] for mol in _mols: try: gs.append( esp.Graph( Molecule.from_rdkit(mol, allow_undefined_stereo=True) ) ) count += 1 except: pass if first != -1 and count >= first: break return esp.data.dataset.GraphDataset(gs, *args, **kwargs) def md17_old(*args, **kwargs): return [ esp.data.md17_utils.get_molecule(name, *args, **kwargs) for name in [ "benzene", "uracil", "naphthalene", "aspirin", "salicylic", "malonaldehyde", "ethanol", "toluene", "paracetamol", "azobenzene", ] ] def md17_new(*args, **kwargs): return [ esp.data.md17_utils.get_molecule(name, *args, **kwargs).heterograph for name in [ "paracetamol", "azobenzene", "benzene", "ethanol", ] ] class qca(object): pass df_names = [ "Bayer", "Coverage", "eMolecules", "Pfizer", "Roche", "Benchmark", "fda", ] def _get_ds(cls, df_name): import os import pandas as pd path = os.path.dirname(esp.__file__) + "/../data/qca/%s.h5" % df_name df = pd.read_hdf(path) ds = esp.data.qcarchive_utils.h5_to_dataset(df) return ds from functools import partial for df_name in df_names: setattr( qca, df_name.lower(), classmethod(partial(_get_ds, df_name=df_name)), ) ================================================ FILE: espaloma/data/dataset.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import abc import torch import espaloma as esp # ============================================================================= # MODULE CLASSES # ============================================================================= class Dataset(abc.ABC, torch.utils.data.Dataset): """The base class of map-style dataset. Parameters ---------- graphs : List objects in the dataset Methods ------- shuffle Randomly shuffle the graphs in the dataset. apply(fn, in_place=True) Apply a function to every graph in the dataset. If `in_place=True`, modify the graph in-place. split(partitions) Split the dataset into partitions subsample(ratio, seed=None) Subsample the dataset. save(path) Save the dataset to a local path. load(path) Load a dataset from local path. Note ---- This also supports iterative-style dataset by deleting `__getitem__` and `__len__` function. Attributes ---------- transforms : an iterable of callables that transforms the input. the `__getiem__` method applies these transforms later. Examples -------- >>> data = Dataset([esp.Graph("C")]) """ def __init__(self, graphs=None): super(Dataset, self).__init__() self.graphs = graphs self.transforms = None def __len__(self): # 0 len if no graphs if self.graphs is None: return 0 else: return len(self.graphs) def __getitem__(self, idx): if self.graphs is None: raise RuntimeError("Empty molecule dataset.") if isinstance(idx, int): # sinlge element if self.transforms is None: # when no transform act like list return self.graphs[idx] else: graph = self.graphs[idx] # nested transforms for transform in self.transforms: graph = transform(graph) return graph elif isinstance(idx, slice): # implement slicing if self.transforms is None: # return a Dataset object rather than list return self.__class__(graphs=self.graphs[idx]) else: graphs = [] for graph in self.graphs[idx]: # nested transforms for transform in self.transforms: graph = transform(graph) graphs.append(graph) return self.__class__(graphs=graphs) elif isinstance(idx, list): # implement slicing if self.transforms is None: # return a Dataset object rather than list return self.__class__( graphs=[self.graphs[_idx] for _idx in idx] ) else: graphs = [] for _idx in idx: graph = self[_idx] # nested transforms for transform in self.transforms: graph = transform(graph) graphs.append(graph) return self.__class__(graphs=graphs) def __iter__(self): if self.transforms is None: return iter(self.graphs) else: # TODO: # is this efficient? graphs = iter(self.graphs) for transform in self.transforms: graphs = map(transform, graphs) return graphs def shuffle(self, seed=None): import random from random import shuffle if seed is not None: random.seed(seed) shuffle(self.graphs) return self def apply(self, fn, in_place=False): r"""Apply functions to the elements of the dataset. Parameters ---------- fn : callable Note ---- If in_place is False, `fn` is added to the `transforms` else it is applied to elements and modifies them. """ assert callable(fn) assert isinstance(in_place, bool) if in_place is False: # add to list of transforms if self.transforms is None: self.transforms = [] self.transforms.append(fn) else: # modify in-place # self.graphs = list(map(fn, self.graphs)) _graphs = [] for graph in self.graphs: try: _graphs.append(fn(graph)) except: pass self.graphs = _graphs return self # to allow grammar: ds = ds.apply(...) def split(self, partition): """Split the dataset according to some partition. Parameters ---------- partition : sequence of integers or floats """ n_data = len(self) p_sizes = [] for i, _partition in enumerate(partition): p_size = int((n_data - sum(p_sizes)) * _partition / sum(partition[i:])) p_sizes.append(p_size) assert sum(p_sizes) == n_data, f"{p_sizes}, {sum(p_sizes)}" ds = [] idx = 0 for p_size in p_sizes: ds.append(self[idx : idx + p_size]) idx += p_size return ds def subsample(self, ratio, seed=None): """Subsample the dataset according to some ratio. Parameters ---------- ratio : float Ratio between the size of the subsampled dataset and the original dataset. """ n_data = len(self) idxs = list(range(n_data)) import random random.seed(seed) _idxs = random.choices(idxs, k=int(n_data * ratio)) return self[_idxs] def save(self, path): """Save dataset to path. Parameters ---------- path : path-like object """ import pickle with open(path, "wb") as f_handle: pickle.dump(self.graphs, f_handle) def regenerate_impropers(self, improper_def="smirnoff"): """ Regenerate the improper nodes for all graphs. Parameters ---------- improper_def : str Which convention to use for permuting impropers. """ from espaloma.graphs.utils.regenerate_impropers import ( regenerate_impropers, ) for g in self.graphs: regenerate_impropers(g, improper_def) @classmethod def load(cls, path): """Load path to dataset. Parameters ---------- """ import pickle with open(path, "rb") as f_handle: graphs = pickle.load(f_handle) return cls(graphs) def __add__(self, x): return self.__class__(self.graphs + x.graphs) class GraphDataset(Dataset): """Dataset with additional support for only viewing certain attributes as `torch.utils.data.DataLoader` Methods ------- view(collate_fn, *args, **kwargs) Provide a `torch.utils.data.DataLoader` view of the dataset. Note """ def __init__(self, graphs=[], first=None): super(GraphDataset, self).__init__() from openff.toolkit.topology import Molecule if all( isinstance(graph, Molecule) or isinstance(graph, str) for graph in graphs ): if first is None or first == -1: graphs = [esp.Graph(graph) for graph in graphs] else: graphs = [esp.Graph(graph) for graph in graphs[:first]] self.graphs = graphs @staticmethod def batch(graphs): import dgl if all(isinstance(graph, esp.graphs.graph.Graph) for graph in graphs): return dgl.batch([graph.heterograph for graph in graphs]) elif all(isinstance(graph, dgl.DGLGraph) for graph in graphs): return dgl.batch(graphs) elif all(isinstance(graph, dgl.DGLHeteroGraph) for graph in graphs): return dgl.batch(graphs) else: raise RuntimeError( "Can only batch DGLGraph or DGLHeterograph," "now have %s" % type(graphs[0]) ) def view(self, collate_fn="graph", *args, **kwargs): """Provide a data loader. Parameters ---------- collate_fn : callable or string see `collate_fn` argument for `torch.utils.data.DataLoader` """ if collate_fn == "graph": collate_fn = self.batch elif collate_fn == "homograph": def collate_fn(graphs): graph = self.batch([g.homograph for g in graphs]) return graph elif collate_fn == "graph-typing": def collate_fn(graphs): graph = self.batch(graphs) y = graph.ndata["legacy_typing"] return graph, y elif collate_fn == "graph-typing-loss": loss_fn = torch.nn.CrossEntropyLoss() def collate_fn(graphs): graph = self.batch(graphs) loss = lambda _graph: loss_fn( _graph.ndata["nn_typing"], graph.ndata["legacy_typing"] ) return graph, loss return torch.utils.data.DataLoader( dataset=self, collate_fn=collate_fn, *args, **kwargs ) def save(self, path): import os os.mkdir(path) for idx, graph in enumerate(self.graphs): graph.save(path + "/" + str(idx)) @classmethod def load(cls, path): import os paths = os.listdir(path) paths = [_path for _path in paths] graphs = [] for _path in paths: graphs.append(esp.Graph.load(path + "/" + _path)) return cls(graphs) ================================================ FILE: espaloma/data/md.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import numpy as np import torch from openmmforcefields.generators import SystemGenerator import openmm from openmm import unit from openmm.app import Simulation from openmm.unit import Quantity from espaloma.units import * import espaloma as esp # ============================================================================= # CONSTANTS # ============================================================================= # simulation specs TEMPERATURE = 350 * unit.kelvin STEP_SIZE = 1.0 * unit.femtosecond COLLISION_RATE = 1.0 / unit.picosecond EPSILON_MIN = 0.05 * unit.kilojoules_per_mole # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def add_nonbonded_force( g, forcefield="gaff-1.81", add_charges=True, ): # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], forcefield_kwargs={"constraints": None, "removeCMMotion": False}, ) # create openmm system system = generator.create_system( topology, ) # use langevin integrator, although it's not super useful here integrator = openmm.LangevinIntegrator( TEMPERATURE, COLLISION_RATE, STEP_SIZE ) # create simulation simulation = Simulation( topology=topology, system=system, integrator=integrator ) # get forces forces = list(system.getForces()) # loop through forces for force in forces: name = force.__class__.__name__ # turn off angle if "Angle" in name: for idx in range(force.getNumAngles()): id1, id2, id3, angle, k = force.getAngleParameters(idx) force.setAngleParameters(idx, id1, id2, id3, angle, 0.0) force.updateParametersInContext(simulation.context) elif "Bond" in name: for idx in range(force.getNumBonds()): id1, id2, length, k = force.getBondParameters(idx) force.setBondParameters( idx, id1, id2, length, 0.0, ) force.updateParametersInContext(simulation.context) elif "Torsion" in name: for idx in range(force.getNumTorsions()): ( id1, id2, id3, id4, periodicity, phase, k, ) = force.getTorsionParameters(idx) force.setTorsionParameters( idx, id1, id2, id3, id4, periodicity, phase, 0.0, ) force.updateParametersInContext(simulation.context) elif "Nonbonded" in name: if add_charges == False: for idx in range(force.getNumParticles()): q, sigma, epsilon = force.getParticleParameters(idx) force.setParticleParameters(idx, q * 1e-8, sigma, epsilon) for idx in range(force.getNumExceptions()): ( idx0, idx1, q, sigma, epsilon, ) = force.getExceptionParameters(idx) force.setExceptionParameters( idx, idx0, idx1, q * 1e-8, sigma, epsilon ) force.updateParametersInContext(simulation.context) # the snapshots xs = ( Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ) .value_in_unit(unit.nanometer) .transpose((1, 0, 2)) ) # loop through the snapshots energies = [] derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) * -1 energies.append(energy) derivatives.append(derivative) # put energies to a tensor energies = torch.tensor( energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] derivatives = torch.tensor( np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), ) # add the energies g.heterograph.apply_nodes( lambda node: {"u": node.data["u"] + energies}, ntype="g", ) return g def get_coulomb_force( g, forcefield="gaff-1.81", ): # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], forcefield_kwargs={"constraints": None, "removeCMMotion": False}, ) # create openmm system system = generator.create_system( topology, ) # use langevin integrator, although it's not super useful here integrator = openmm.LangevinIntegrator( TEMPERATURE, COLLISION_RATE, STEP_SIZE ) # create simulation simulation = Simulation( topology=topology, system=system, integrator=integrator ) # the snapshots xs = ( Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ) .value_in_unit(unit.nanometer) .transpose((1, 0, 2)) ) # loop through the snapshots energies = [] derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) * -1 energies.append(energy) derivatives.append(derivative) # put energies to a tensor energies = torch.tensor( energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] derivatives = torch.tensor( np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), ) # loop through forces forces = list(system.getForces()) for force in forces: name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) for idx in range(force.getNumParticles()): q, sigma, epsilon = force.getParticleParameters(idx) force.setParticleParameters(idx, q * 1e-8, sigma, epsilon) for idx in range(force.getNumExceptions()): idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( idx ) force.setExceptionParameters( idx, idx0, idx1, q * 1e-8, sigma, epsilon ) force.updateParametersInContext(simulation.context) # the snapshots xs = ( Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ) .value_in_unit(unit.nanometer) .transpose((1, 0, 2)) ) # loop through the snapshots new_energies = [] new_derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) * -1 new_energies.append(energy) new_derivatives.append(derivative) # put energies to a tensor new_energies = torch.tensor( new_energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] new_derivatives = torch.tensor( np.stack(new_derivatives, axis=1), dtype=torch.get_default_dtype(), ) return energies - new_energies, derivatives - new_derivatives def subtract_coulomb_force( g, forcefield="gaff-1.81", ): delta_energies, delta_derivatives = get_coulomb_force( g, forcefield=forcefield ) # subtract the energies g.heterograph.apply_nodes( lambda node: {"u_ref": node.data["u_ref"] - delta_energies}, ntype="g", ) if "u_ref_prime" in g.nodes["n1"].data: g.heterograph.apply_nodes( lambda node: { "u_ref_prime": node.data["u_ref_prime"] - delta_derivatives }, ntype="n1", ) return g def subtract_nonbonded_force( g, forcefield="gaff-1.81", subtract_charges=True, ): # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], forcefield_kwargs={"constraints": None, "removeCMMotion": False}, ) # create openmm system system = generator.create_system( topology, ) # use langevin integrator, although it's not super useful here integrator = openmm.LangevinIntegrator( TEMPERATURE, COLLISION_RATE, STEP_SIZE ) # create simulation simulation = Simulation( topology=topology, system=system, integrator=integrator ) # get forces forces = list(system.getForces()) # loop through forces for force in forces: name = force.__class__.__name__ # turn off angle if "Angle" in name: for idx in range(force.getNumAngles()): id1, id2, id3, angle, k = force.getAngleParameters(idx) force.setAngleParameters(idx, id1, id2, id3, angle, 0.0) force.updateParametersInContext(simulation.context) elif "Bond" in name: for idx in range(force.getNumBonds()): id1, id2, length, k = force.getBondParameters(idx) force.setBondParameters( idx, id1, id2, length, 0.0, ) force.updateParametersInContext(simulation.context) elif "Torsion" in name: for idx in range(force.getNumTorsions()): ( id1, id2, id3, id4, periodicity, phase, k, ) = force.getTorsionParameters(idx) force.setTorsionParameters( idx, id1, id2, id3, id4, periodicity, phase, 0.0, ) force.updateParametersInContext(simulation.context) elif "Nonbonded" in name: # only handle LJ potentials # subtract Coulomb interaction seperately with nocutoff method if substract_charges==True for idx in range(force.getNumParticles()): q, sigma, epsilon = force.getParticleParameters(idx) force.setParticleParameters(idx, q * 1e-8, sigma, epsilon) for idx in range(force.getNumExceptions()): idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( idx ) force.setExceptionParameters( idx, idx0, idx1, q * 1e-8, sigma, epsilon ) force.updateParametersInContext(simulation.context) # the snapshots xs = ( Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ) .value_in_unit(unit.nanometer) .transpose((1, 0, 2)) ) # loop through the snapshots energies = [] derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) * -1 energies.append(energy) derivatives.append(derivative) # put energies to a tensor energies = torch.tensor( energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] derivatives = torch.tensor( np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), ) # subtract the energies g.heterograph.apply_nodes( lambda node: {"u_ref": node.data["u_ref"] - energies}, ntype="g", ) if "u_ref_prime" in g.nodes["n1"].data: g.heterograph.apply_nodes( lambda node: { "u_ref_prime": node.data["u_ref_prime"] - derivatives }, ntype="n1", ) if subtract_charges: g = subtract_coulomb_force(g) return g def subtract_nonbonded_force_except_14( g, forcefield="gaff-1.81", ): # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], ) # create openmm system system = generator.create_system( topology, ) # use langevin integrator, although it's not super useful here integrator = openmm.LangevinIntegrator( TEMPERATURE, COLLISION_RATE, STEP_SIZE ) # create simulation simulation = Simulation( topology=topology, system=system, integrator=integrator ) # get forces forces = list(system.getForces()) # loop through forces for force in forces: name = force.__class__.__name__ # turn off angle if "Angle" in name: for idx in range(force.getNumAngles()): id1, id2, id3, angle, k = force.getAngleParameters(idx) force.setAngleParameters(idx, id1, id2, id3, angle, 0.0) force.updateParametersInContext(simulation.context) elif "Bond" in name: for idx in range(force.getNumBonds()): id1, id2, length, k = force.getBondParameters(idx) force.setBondParameters( idx, id1, id2, length, 0.0, ) force.updateParametersInContext(simulation.context) elif "Torsion" in name: for idx in range(force.getNumTorsions()): ( id1, id2, id3, id4, periodicity, phase, k, ) = force.getTorsionParameters(idx) force.setTorsionParameters( idx, id1, id2, id3, id4, periodicity, phase, 0.0, ) force.updateParametersInContext(simulation.context) elif "Nonbonded" in name: for idx in range(force.getNumExceptions()): idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( idx ) force.setExceptionParameters( idx, idx0, idx1, q, sigma, epsilon * 1e-8 ) force.updateParametersInContext(simulation.context) # the snapshots xs = ( Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ) .value_in_unit(unit.nanometer) .transpose((1, 0, 2)) ) # loop through the snapshots energies = [] derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) * -1 energies.append(energy) derivatives.append(derivative) # put energies to a tensor energies = torch.tensor( energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] derivatives = torch.tensor( np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), ) # subtract the energies g.heterograph.apply_nodes( lambda node: {"u_ref": node.data["u_ref"] - energies}, ntype="g", ) if "u_ref_prime" in g.nodes["n1"].data: g.heterograph.apply_nodes( lambda node: { "u_ref_prime": node.data["u_ref_prime"] - derivatives }, ntype="n1", ) return g # ============================================================================= # MODULE CLASSES # ============================================================================= class MoleculeVacuumSimulation(object): """Simluate a single molecule system in vaccum. Parameters ---------- g : `espaloma.Graph` Input molecular graph. n_samples : `int` Number of samples to collect. n_steps_per_sample : `int` Number of steps between each sample. temperature : `float * unit.kelvin` Temperature for the simluation. collision_rate : `float / unit.picosecond` Collision rate. timestep : `float * unit.femtosecond` Time step. Methods ------- simulation_from_graph : Create simluation from molecule. run : Run the simluation. """ def __init__( self, forcefield="gaff-1.81", n_samples=100, n_conformers=10, n_steps_per_sample=1000, temperature=TEMPERATURE, collision_rate=COLLISION_RATE, step_size=STEP_SIZE, charge_method=None, ): self.n_samples = n_samples self.n_steps_per_sample = n_steps_per_sample self.temperature = temperature self.collision_rate = collision_rate self.step_size = step_size self.forcefield = forcefield self.n_conformers = n_conformers self.charge_method = charge_method def simulation_from_graph(self, g): """Create simulation from moleucle""" # assign partial charge if self.charge_method is not None: g.mol.assign_partial_charges(self.charge_method) # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=self.forcefield, molecules=[g.mol], ) # create openmm system system = generator.create_system( topology, ) # set epsilon minimum to 0.05 kJ/mol for force in system.getForces(): if "Nonbonded" in force.__class__.__name__: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) for particle_index in range(force.getNumParticles()): charge, sigma, epsilon = force.getParticleParameters( particle_index ) if epsilon < EPSILON_MIN: force.setParticleParameters( particle_index, charge, sigma, EPSILON_MIN ) # use langevin integrator integrator = openmm.LangevinIntegrator( self.temperature, self.collision_rate, self.step_size ) # initialize simulation simulation = Simulation( topology=topology, system=system, integrator=integrator, platform=openmm.Platform.getPlatformByName("Reference"), ) return simulation def run(self, g, in_place=True): """Collect samples from simulation. Parameters ---------- g : `esp.Graph` Input graph. in_place : `bool` If ture, Returns ------- samples : `torch.Tensor`, `shape=(n_samples, n_nodes, 3)` `in_place=True` Sample. graph : `esp.Graph` Modified graph. """ # build simulation simulation = self.simulation_from_graph(g) import openff.toolkit # get conformer g.mol.generate_conformers( toolkit_registry=openff.toolkit.utils.RDKitToolkitWrapper(), n_conformers=self.n_conformers, ) # get number of actual conformers true_n_conformers = len(g.mol.conformers) samples = [] for idx in range(true_n_conformers): # put conformer in simulation simulation.context.setPositions(g.mol.conformers[idx].to_openmm()) # set velocities simulation.context.setVelocitiesToTemperature(self.temperature) # minimize simulation.minimizeEnergy() # loop through number of samples for _ in range(self.n_samples // self.n_conformers): # run MD for `self.n_steps_per_sample` steps simulation.step(self.n_steps_per_sample) # append samples to `samples` samples.append( simulation.context.getState(getPositions=True) .getPositions(asNumpy=True) .value_in_unit(DISTANCE_UNIT) ) # if the `samples` array is not filled, # pick a random conformer to do it again if len(samples) < self.n_samples: len_samples = len(samples) import random idx = random.choice(list(range(true_n_conformers))) simulation.context.setPositions(g.mol.conformers[idx].to_openmm()) # set velocities simulation.context.setVelocitiesToTemperature(self.temperature) # minimize simulation.minimizeEnergy() # loop through number of samples for _ in range(self.n_samples - len_samples): # run MD for `self.n_steps_per_sample` steps simulation.step(self.n_steps_per_sample) # append samples to `samples` samples.append( simulation.context.getState(getPositions=True) .getPositions(asNumpy=True) .value_in_unit(DISTANCE_UNIT) ) assert len(samples) == self.n_samples # put samples into an array samples = np.array(samples) # put samples into tensor samples = torch.tensor(samples, dtype=torch.float32) if in_place is True: g.heterograph.nodes["n1"].data["xyz"] = samples.permute(1, 0, 2) # require gradient for force matching g.heterograph.nodes["n1"].data["xyz"].requires_grad = True return g return samples ================================================ FILE: espaloma/data/md17_utils.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import numpy as np import torch import espaloma as esp from openmm import unit from openmm.unit import Quantity # ============================================================================= # CONSTANTS # ============================================================================= MOLECULES = { "benzene": "C1=CC=CC=C1", "uracil": "O=C1NC=CC(=O)N1", "naphthalene": "C1=CC=C2C=CC=CC2=C1", "aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O", "salicylic": "C1=CC=C(C(=C1)C(=O)O)O", "malonaldehyde": "C(C=O)C=O", "ethanol": "CCO", "toluene": "CC1=CC=CC=C1", "paracetamol": "CC(=O)NC1=CC=C(C=C1)O", "azobenzene": "C1=CC=C(C=C1)N=NC2=CC=CC=C2", } OFFSETS = { 1: -0.500607632585, 6: -37.8302333826, 7: -54.5680045287, 8: -75.0362229210, } # ============================================================================== # UTILITY FUNCTIONS # ============================================================================== def sum_offsets(elements): return sum([OFFSETS[element] for element in elements]) def realize_molecule( data, name, smiles=None, first=-1, subtract_nonbonded=True ): elements = data["z"].tolist() offset = sum_offsets(elements) g = esp.data.utils.infer_mol_from_coordinates( data["R"][0], elements, smiles, ) g.nodes["n1"].data["xyz"] = torch.tensor( Quantity( data["R"].transpose(1, 0, 2), unit.angstrom, ).value_in_unit(esp.units.DISTANCE_UNIT), requires_grad=True, )[:, :first, :] g.nodes["g"].data["u_ref"] = ( torch.tensor( Quantity( data["E"], unit.kilocalorie_per_mole, ).value_in_unit(esp.units.ENERGY_UNIT) ).transpose(1, 0)[:, :first] - offset ) g.nodes["n1"].data["u_ref_prime"] = torch.tensor( Quantity( data["F"], unit.kilocalorie_per_mole / unit.angstrom, ).value_in_unit(esp.units.FORCE_UNIT) ).transpose(1, 0)[:, :first, :] if subtract_nonbonded is True: g = esp.data.md.subtract_nonbonded_force(g) return g def get_molecule(name, *args, **kwargs): if name == "benzene": file_name = "benzene_old_dft.npz" else: file_name = "%s_dft.npz" % name from os.path import exists if not exists(file_name): url = "http://www.quantum-machine.org/gdml/data/npz/%s" % file_name print(url) import urllib.request urllib.request.urlretrieve(url, file_name) data = np.load(file_name) smiles = MOLECULES[name] g = realize_molecule(data, name, smiles, *args, **kwargs) return g ================================================ FILE: espaloma/data/normalize.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import abc import torch import espaloma as esp # ============================================================================= # BASE CLASSES # ============================================================================= class BaseNormalize(abc.ABC): """Base class for normalizing operation.""" def __init__(self): super(BaseNormalize, self).__init__() @abc.abstractmethod def _prepare(self): # NOTE: # `_norm` and `_unnorm` are assigned here raise NotImplementedError # ============================================================================= # MODULE CLASSES # ============================================================================= class DatasetNormalNormalize(BaseNormalize): """Normalizing operation based on a dataset of molecules, assuming parameters having normal distribution. Parameters ---------- dataset : `espaloma.data.dataset.Dataset` The dataset we base on to calculate the statistics of parameter distributions. Attributes ---------- norm : normalize function unnorm : unnormalize function """ def __init__(self, dataset): super(DatasetNormalNormalize, self).__init__() self.dataset = dataset self._prepare() def _prepare(self): """Calculate the statistics from dataset""" # grab the collection of graphs in the dataset, batched g = self.dataset.batch(self.dataset.graphs) self.statistics = {term: {} for term in ["n1", "n2", "n3", "n4"]} # calculate statistics for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[term].data.keys(): # loop through parameters if not key.endswith("ref"): # pass non-parameters continue self.statistics[term][ key.replace("_ref", "_mean") ] = torch.mean(g.nodes[term].data[key], axis=0) self.statistics[term][ key.replace("_ref", "_std") ] = torch.std(g.nodes[term].data[key], axis=0) # get normalize and unnormalize functions def norm(g): for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[ term ].data.keys(): # loop through parameters if not key.endswith("ref"): # pass non-parameters continue g.nodes[term].data[key] = ( g.nodes[term].data[key] - self.statistics[term][key.replace("_ref", "_mean")] ) / self.statistics[term][key.replace("_ref", "_std")] return g def unnorm(g): for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[ term ].data.keys(): # loop through parameters if key + "_mean" in self.statistics[term]: g.nodes[term].data[key] = ( g.nodes[term].data[key] * self.statistics[term][key + "_std"] + self.statistics[term][key + "_mean"] ) # # elif '_ref' in key \ # and key.replace('_ref', '_mean')\ # in self.statistics[term]: # # g.nodes[term].data[key]\ # = g.nodes[term].data[key]\ # * self.statistics[term][ # key.replace('_ref', '_std')]\ # + self.statistics[term][ # key.replace('_ref', '_mean')] return g # point normalize and unnormalize functions to `self` self.norm = norm self.unnorm = unnorm class DatasetLogNormalNormalize(BaseNormalize): """Normalizing operation based on a dataset of molecules, assuming parameters having log normal distribution. Parameters ---------- dataset : `espaloma.data.dataset.Dataset` The dataset we base on to calculate the statistics of parameter distributions. Attributes ---------- norm : normalize function unnorm : unnormalize function """ def __init__(self, dataset): super(DatasetLogNormalNormalize, self).__init__() self.dataset = dataset self._prepare() def _prepare(self): """Calculate the statistics from dataset""" # grab the collection of graphs in the dataset, batched g = self.dataset.batch(self.dataset.graphs) self.statistics = {term: {} for term in ["n1", "n2", "n3", "n4"]} # calculate statistics for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[term].data.keys(): # loop through parameters if not key.endswith("ref"): # pass non-parameters continue self.statistics[term][ key.replace("_ref", "_mean") ] = torch.mean(g.nodes[term].data[key].log(), axis=0) self.statistics[term][ key.replace("_ref", "_std") ] = torch.std(g.nodes[term].data[key].log(), axis=0) # get normalize and unnormalize functions def norm(g): for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[ term ].data.keys(): # loop through parameters if not key.endswith("ref"): # pass non-parameters continue g.nodes[term].data[key] = ( g.nodes[term].data[key].log() - self.statistics[term][key.replace("_ref", "_mean")] ) / self.statistics[term][key.replace("_ref", "_std")] return g def unnorm(g): for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[ term ].data.keys(): # loop through parameters if key + "_mean" in self.statistics[term]: g.nodes[term].data[key] = torch.exp( g.nodes[term].data[key] * self.statistics[term][key + "_std"].to( g.nodes[term].data[key].device ) + self.statistics[term][key + "_mean"].to( g.nodes[term].data[key].device ) ) # # elif '_ref' in key \ # and key.replace('_ref', '_mean')\ # in self.statistics[term]: # # g.nodes[term].data[key]\ # = torch.exp( # g.nodes[term].data[key]\ # * self.statistics[term][ # key.replace('_ref', '_std')]\ # + self.statistics[term][ # key.replace('_ref', '_mean')]) return g # point normalize and unnormalize functions to `self` self.norm = norm self.unnorm = unnorm # ============================================================================= # PRESETS # ============================================================================= class ESOL100NormalNormalize(DatasetNormalNormalize): def __init__(self): super(ESOL100NormalNormalize, self).__init__( dataset=esp.data.esol(first=100).apply( esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize, in_place=True, ) ) class ESOL100LogNormalNormalize(DatasetLogNormalNormalize): def __init__(self): super(ESOL100LogNormalNormalize, self).__init__( dataset=esp.data.esol(first=100).apply( esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize, in_place=True, ) ) class NotNormalize(BaseNormalize): def __init__(self): super(NotNormalize).__init__() self._prepare() def _prepare(self): self.norm = lambda x: x self.unnorm = lambda x: x class PositiveNotNormalize(BaseNormalize): def __init__(self): super(PositiveNotNormalize, self).__init__() self._prepare() def _prepare(self): # get normalize and unnormalize functions def norm(g): for term in ["n1", "n2", "n3", "n4"]: # loop through terms for key in g.nodes[ term ].data.keys(): # loop through parameters if not key.endswith("ref"): # pass non-parameters continue g.nodes[term].data[key] = g.nodes[term].data[key].log() return g def unnorm(g): for term in [ "n2", "n3", ]: # loop through terms for key in g.nodes[ term ].data.keys(): # loop through parameters if key == "k" or key == "eq": g.nodes[term].data[key] = torch.exp( g.nodes[term].data[key] ) return g # point normalize and unnormalize functions to `self` self.norm = norm self.unnorm = unnorm ================================================ FILE: espaloma/data/off-mol_0_10_6.json ================================================ "{\"name\": \"\", \"atoms\": [{\"atomic_number\": 8, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 7, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 16, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 17, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 6, \"formal_charge\": 0, \"is_aromatic\": true, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}, {\"atomic_number\": 1, \"formal_charge\": 0, \"is_aromatic\": false, \"stereochemistry\": null, \"name\": \"\"}], \"virtual_sites\": [], \"bonds\": [{\"atom1\": 0, \"atom2\": 1, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 1, \"atom2\": 2, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 2, \"atom2\": 3, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 3, \"atom2\": 4, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 4, \"atom2\": 5, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 5, \"atom2\": 6, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 6, \"atom2\": 7, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 7, \"atom2\": 8, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 8, \"atom2\": 9, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 9, \"atom2\": 10, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 10, \"atom2\": 11, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 1, \"atom2\": 12, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 12, \"atom2\": 13, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 13, \"atom2\": 14, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 14, \"atom2\": 15, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 15, \"atom2\": 16, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 16, \"atom2\": 17, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 17, \"atom2\": 18, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 12, \"atom2\": 19, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 19, \"atom2\": 20, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 20, \"atom2\": 21, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 21, \"atom2\": 22, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 22, \"atom2\": 23, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 22, \"atom2\": 24, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 24, \"atom2\": 25, \"bond_order\": 2, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 11, \"atom2\": 3, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 18, \"atom2\": 14, \"bond_order\": 2, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 25, \"atom2\": 19, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 11, \"atom2\": 6, \"bond_order\": 1, \"is_aromatic\": true, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 2, \"atom2\": 26, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 2, \"atom2\": 27, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 7, \"atom2\": 28, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 8, \"atom2\": 29, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 9, \"atom2\": 30, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 10, \"atom2\": 31, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 13, \"atom2\": 32, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 13, \"atom2\": 33, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 15, \"atom2\": 34, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 16, \"atom2\": 35, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 18, \"atom2\": 36, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 20, \"atom2\": 37, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 21, \"atom2\": 38, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 24, \"atom2\": 39, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}, {\"atom1\": 25, \"atom2\": 40, \"bond_order\": 1, \"is_aromatic\": false, \"stereochemistry\": null, \"fractional_bond_order\": null}], \"properties\": {}, \"conformers\": null, \"partial_charges\": null, \"partial_charges_unit\": null}" ================================================ FILE: espaloma/data/qcarchive_utils.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= from collections import namedtuple from typing import Tuple import numpy as np import qcportal import torch from openmm import unit from openmm.unit import Quantity import espaloma as esp # ============================================================================= # CONSTANTS # ============================================================================= # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= def get_client(url: str = "api.qcarchive.molssi.org") -> qcportal.client.PortalClient: """ Returns a instance of the qcportal client. Parameters ---------- url: str, default="api.qcarchive.molssi.org" qcportal instance to connect Returns ------- qcportal.client.PortalClient qcportal client instance. """ # Note, this may need to be modified to include username/password for non-public servers return qcportal.PortalClient(url) def get_collection( client, collection_type="optimization", name="OpenFF Full Optimization Benchmark 1", ): """ Connects to a specific dataset on qcportal Parameters ---------- client: qcportal.client, required The qcportal client instance collection_type: str, default="optimization" The type of qcarchive collection, options are "torsiondrive", "optimization", "gridoptimization", "reaction", "singlepoint" "manybody" name: str, default="OpenFF Full Optimization Benchmark 1" Name of the dataset Returns ------- (qcportal dataset, list(str)) Tuple with an instance of qcportal dataset and list of record names """ collection = client.get_dataset( dataset_type=collection_type, dataset_name=name, ) record_names = collection.entry_names return collection, record_names def process_record(record, entry): """ Processes a given record/entry pair from a dataset and returns the graph Parameters ---------- record: qcportal.optimization.record_models.OptimizationRecord qcportal record entry: cportal.optimization.dataset_models.OptimizationDatasetEntry qcportal entry Returns ------- esp.Graph """ from openff.toolkit.topology import Molecule if record.record_type == "optimization": trajectory = record.trajectory if trajectory is None: return None else: raise Exception( f"{record.record_type} is not supported: only optimization datasets can be processed." ) mol = Molecule.from_qcschema(entry.dict()) g = esp.Graph(mol) # energy is already hartree g.nodes["g"].data["u_ref"] = torch.tensor( [ Quantity( snapshot.properties["scf_total_energy"], esp.units.HARTREE_PER_PARTICLE, ).value_in_unit(esp.units.ENERGY_UNIT) for snapshot in trajectory ], dtype=torch.get_default_dtype(), )[None, :] g.nodes["n1"].data["xyz"] = torch.tensor( np.stack( [ Quantity( snapshot.molecule.geometry, unit.bohr, ).value_in_unit(esp.units.DISTANCE_UNIT) for snapshot in trajectory ], axis=1, ), requires_grad=True, dtype=torch.get_default_dtype(), ) g.nodes["n1"].data["u_ref_prime"] = torch.stack( [ torch.tensor( Quantity( np.array(snapshot.properties["return_result"]).reshape((-1, 3)), esp.units.HARTREE_PER_PARTICLE / unit.bohr, ).value_in_unit(esp.units.FORCE_UNIT), dtype=torch.get_default_dtype(), ) for snapshot in trajectory ], dim=1, ) return g def get_graph(collection, record_name, spec_name="default"): """ Processes the qcportal data for a given record name. This supports optimization and singlepoint datasets. Parameters ---------- collection, qcportal dataset, required The instance of the qcportal dataset record_name, str, required The name of a give record spec_name, str, default="default" Retrieve data for a given qcportal specification. Returns ------- Graph """ # get record and trajectory record = collection.get_record(record_name, specification_name=spec_name) entry = collection.get_entry(record_name) g = process_record(record, entry) return g def get_graphs(collection, record_names, spec_name="default"): """ Processes the qcportal data for a given set of record names. This uses the qcportal iteration functions which are faster than processing records one at a time. This supports optimization and singlepoint datasets. Parameters ---------- collection, qcportal dataset, required The instance of the qcportal dataset record_name, List[str], required A list of the record_names of a give record spec_name, str, default="default" Retrieve data for a given qcportal specification. Returns ------- list(graph) Returns a list of the corresponding graph for each record name """ g_list = [] for record, entry in zip( collection.iterate_records(record_names, specification_names=[spec_name]), collection.iterate_entries(record_names), ): # note iterate records returns a tuple of length 3 (name, spec_name, actual record information) g = process_record(record[2], entry) g_list.append(g) return g_list def fetch_td_record(record: qcportal.torsiondrive.record_models.TorsiondriveRecord): """ Fetches configuration, energy, and gradients for a given torsiondrive record as a function of different angles. Parameters ---------- record: qcportal.torsiondrive.record_models.TorsiondriveRecord, required Torsiondrive record of interest Returns ------- tuple, ( numpy.array, numpy.array, numpy.array,numpy.array) Returned data is a tuple of numpy arrays. The first index contains angles and subsequent arrays represent molecule coordinate, energy and gradients associated with each angle. """ molecule_optimization = record.optimizations angle_keys = list(molecule_optimization.keys()) xyzs = [] energies = [] gradients = [] for angle in angle_keys: # NOTE: this is calling the first index of the optimization array # this gives the same value as the prior implementation. # however it seems to be that this contains multiple different initial configurations # that have been optimized. Should all conformers and energies/gradients be considered? mol = molecule_optimization[angle][0].final_molecule result = molecule_optimization[angle][0].trajectory[-1].properties """Note: force = - gradient""" # TODO: attach units here? or later? e = result["current energy"] g = np.array(result["current gradient"]).reshape(-1, 3) xyzs.append(mol.geometry) energies.append(e) gradients.append(g) # to arrays xyz = np.array(xyzs) energies = np.array(energies) gradients = np.array(gradients) # assume each angle key is a tuple -- sort by first angle in tuple # NOTE: (for now making the assumption that these torsion drives are 1D) for k in angle_keys: assert len(k) == 1 to_ordered = np.argsort([k[0] for k in angle_keys]) angles_in_order = [angle_keys[i_] for i_ in to_ordered] flat_angles = np.array(angles_in_order).flatten() # put the xyz's, energies, and gradients in the same order as the angles xyz_in_order = xyz[to_ordered] energies_in_order = energies[to_ordered] gradients_in_order = gradients[to_ordered] # TODO: put this return blob into a better struct return flat_angles, xyz_in_order, energies_in_order, gradients_in_order MolWithTargets = namedtuple( "MolWithTargets", ["offmol", "xyz", "energies", "gradients"] ) def h5_to_dataset(df): def get_smiles(x): try: return x["offmol"].to_smiles() except: return np.nan df["smiles"] = df.apply(get_smiles, axis=1) df = df.dropna() groups = df.groupby("smiles") gs = [] for name, group in groups: mol_ref = group["offmol"][0] assert all(mol_ref == entry for entry in group["offmol"]) g = esp.Graph(mol_ref) u_ref = np.concatenate(group["energies"].values) u_ref_prime = np.concatenate(group["gradients"].values, axis=0).transpose( 1, 0, 2 ) xyz = np.concatenate(group["xyz"].values, axis=0).transpose(1, 0, 2) assert u_ref_prime.shape[0] == xyz.shape[0] == mol_ref.n_atoms assert u_ref.shape[0] == u_ref_prime.shape[1] == xyz.shape[1] # energy is already hartree g.nodes["g"].data["u_ref"] = torch.tensor( Quantity(u_ref, esp.units.HARTREE_PER_PARTICLE).value_in_unit( esp.units.ENERGY_UNIT ), dtype=torch.get_default_dtype(), )[None, :] g.nodes["n1"].data["xyz"] = torch.tensor( Quantity( xyz, unit.bohr, ).value_in_unit(esp.units.DISTANCE_UNIT), requires_grad=True, dtype=torch.get_default_dtype(), ) g.nodes["n1"].data["u_ref_prime"] = torch.tensor( Quantity( u_ref_prime, esp.units.HARTREE_PER_PARTICLE / unit.bohr, ).value_in_unit(esp.units.FORCE_UNIT), dtype=torch.get_default_dtype(), ) gs.append(g) return esp.data.dataset.GraphDataset(gs) def breakdown_along_time_axis(g, batch_size=32): n_snapshots = g.nodes["g"].data["u_ref"].flatten().shape[0] idxs = list(range(n_snapshots)) from random import shuffle shuffle(idxs) chunks = [ idxs[_idx * batch_size: (_idx + 1) * batch_size] for _idx in range(n_snapshots // batch_size) ] _gs = [] for chunk in chunks: _g = esp.Graph(g.mol) _g.nodes["g"].data["u_ref"] = ( g.nodes["g"].data["u_ref"][:, chunk].detach().clone() ) _g.nodes["n1"].data["xyz"] = ( g.nodes["n1"].data["xyz"][:, chunk, :].detach().clone() ) _g.nodes["n1"].data["u_ref_prime"] = ( g.nodes["n1"].data["u_ref_prime"][:, chunk, :].detach().clone() ) _g.nodes["n1"].data["xyz"].requires_grad = True _gs.append(_g) return _gs def make_batch_size_consistent(ds, batch_size=32): import itertools return esp.data.dataset.GraphDataset( list( itertools.chain.from_iterable( [breakdown_along_time_axis(g, batch_size=batch_size) for g in ds] ) ) ) def weight_by_snapshots(g, key="weight"): n_snapshots = g.nodes["n1"].data["xyz"].shape[1] g.nodes["g"].data[key] = torch.tensor(float(1.0 / n_snapshots))[None, :] ================================================ FILE: espaloma/data/tests/test_collection.py ================================================ import pytest @pytest.fixture def esol(): import espaloma as esp return esp.data.esol(first=16) def test_view(esol): view = esol.view(batch_size=4) import dgl graphs = list(view) assert len(graphs) == 4 assert all(isinstance(graph, dgl.DGLHeteroGraph) for graph in graphs) def test_typing(esol): import espaloma as esp typing = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81") esol = esol.apply(typing, in_place=True) view = esol.view(batch_size=4) for g in view: assert g.nodes["n1"].data["legacy_typing"].shape[ 0 ] == g.number_of_nodes(ntype="n1") ================================================ FILE: espaloma/data/tests/test_dataset.py ================================================ import pytest def test_tiny_dataset(): import espaloma as esp xs = list(range(5)) ds = esp.data.dataset.Dataset(xs) @pytest.fixture def ds(): xs = list(range(5)) import espaloma as esp return esp.data.dataset.Dataset(xs) def test_get(ds): assert ds[0] == 0 def test_len(ds): assert len(ds) == 5 def test_iter(ds): assert all(x == x_ for (x, x_) in zip(ds, range(5))) def test_slice(ds): import espaloma as esp sub_ds = ds[:2] assert isinstance(ds, esp.data.dataset.Dataset) assert len(sub_ds) == 2 def test_split(ds): a, b = ds.split([1, 4]) assert len(a) == 1 assert len(b) == 4 @pytest.fixture def ds_new(ds): fn = lambda x: x + 1 return ds.apply(fn) def test_no_change(ds_new): assert all(x == x_ for (x, x_) in zip(ds_new.graphs, range(5))) def test_get_new(ds_new): assert ds_new[0] == 1 def test_len_new(ds_new): assert len(ds_new) == 5 def test_iter_new(ds_new): assert all(x == x_ + 1 for (x, x_) in zip(ds_new, range(5))) @pytest.fixture def ds_newer(ds): fn = lambda x: x + 1 return ds.apply(fn).apply(fn) def test_iter_newer(ds_newer): assert all(x == x_ + 2 for (x, x_) in zip(ds_newer, range(5))) def test_no_return(ds): fn = lambda x: x + 1 ds.apply(fn).apply(fn) assert all(x == x_ + 2 for (x, x_) in zip(ds, range(5))) def test_subsample(ds): _ds = ds.subsample(0.2) assert len(_ds) == 1 ================================================ FILE: espaloma/data/tests/test_md.py ================================================ import pytest import torch def test_init(): import espaloma.data.md @pytest.fixture def graph(): import espaloma as esp graph = esp.Graph("c1ccccc1") return graph @pytest.fixture def ds(): import espaloma as esp ds = esp.data.esol(first=10) return ds def test_system(graph): from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation() def test_run(graph): from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) samples = simulation.run(graph, in_place=False) assert samples.shape == torch.Size([10, 12, 3]) def test_run_in_place(graph): from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) graph = simulation.run(graph, in_place=True) assert graph.nodes["n1"].data["xyz"].shape == torch.Size([12, 10, 3]) def test_apply(ds): from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation( n_samples=1, n_steps_per_sample=1 ).run ds.apply(simulation, in_place=True) assert ds.graphs[0].nodes["n1"].data["xyz"].shape[-1] == 3 assert ds.graphs[0].nodes["n1"].data["xyz"].shape[-2] == 1 ================================================ FILE: espaloma/data/tests/test_normalize.py ================================================ import numpy.testing as npt import pytest def test_import(): from espaloma.data.normalize import BaseNormalize def test_normalize_esol(): import espaloma as esp normalize = esp.data.normalize.DatasetNormalNormalize( dataset=esp.data.esol(first=10).apply( esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize, in_place=True, ) ) def test_log_normalize_esol(): import espaloma as esp normalize = esp.data.normalize.DatasetLogNormalNormalize( dataset=esp.data.esol(first=10).apply( esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize, in_place=True, ) ) def test_normal_normalize_reproduce(): import espaloma as esp normalize = esp.data.normalize.DatasetNormalNormalize( dataset=esp.data.esol(first=10).apply( esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize, in_place=True, ) ) esol = esp.data.esol(first=1) # do some typing param = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize esol.apply(param, in_place=True) # this modify the original data g = esol[0] import copy g_ = copy.deepcopy(g) g = normalize.norm(g) g.nodes["n2"].data["k"] = g.nodes["n2"].data["k_ref"] g.nodes["n2"].data["eq"] = g.nodes["n2"].data["eq_ref"] g = normalize.unnorm(g) npt.assert_almost_equal( g.nodes["n2"].data["k"].detach().numpy(), g_.nodes["n2"].data["k_ref"].detach().numpy(), ) npt.assert_almost_equal( g.nodes["n2"].data["eq"].detach().numpy(), g_.nodes["n2"].data["eq_ref"].detach().numpy(), ) def test_log_normal_normalize_reproduce(): import espaloma as esp normalize = esp.data.normalize.DatasetLogNormalNormalize( dataset=esp.data.esol(first=10).apply( esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize, in_place=True, ) ) esol = esp.data.esol(first=1) # do some typing param = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize esol.apply(param, in_place=True) # this modify the original data g = esol[0] import copy g_ = copy.deepcopy(g) g = normalize.norm(g) g.nodes["n2"].data["k"] = g.nodes["n2"].data["k_ref"] g.nodes["n2"].data["eq"] = g.nodes["n2"].data["eq_ref"] g = normalize.unnorm(g) npt.assert_almost_equal( g.nodes["n2"].data["k"].detach().numpy(), g_.nodes["n2"].data["k_ref"].detach().numpy(), decimal=1, ) npt.assert_almost_equal( g.nodes["n2"].data["eq"].detach().numpy(), g_.nodes["n2"].data["eq_ref"].detach().numpy(), decimal=1, ) ================================================ FILE: espaloma/data/tests/test_qcarchive.py ================================================ import pytest def test_import(): import espaloma.data.qcarchive_utils def test_get_graph(): from espaloma.data import qcarchive_utils client = qcarchive_utils.get_client() collection, record_names = qcarchive_utils.get_collection(client) # The order records are received is not guaranteed, and can change if, # e.g., the underlying database ends up being replaced by a copy during a database migration. # as such we need to use a specific record name. records_names_for_testing = ['c1c2c(c(c(c1f)n3cc(c3)o)cl)n(cc(c2=o)c(=o)[o-])c4c(cc(c(n4)n)f)f-3', 'c1c2c(cc(c1f)n3ccncc3)n(cc(c2=o)c(=o)[o-])c4cc4-0'] record_name = records_names_for_testing[0] assert record_name in record_names graph = qcarchive_utils.get_graph(collection, record_name) assert graph is not None graphs = qcarchive_utils.get_graphs(collection, records_names_for_testing) assert len(graphs) == 2 assert graphs[0] is not None def test_notsupported_dataset(): from espaloma.data import qcarchive_utils name = "DBH24" collection_type = "reaction" collection, record_names = qcarchive_utils.get_collection( qcarchive_utils.get_client("ml.qcarchive.molssi.org"), collection_type, name ) record_name = record_names[0] with pytest.raises(Exception): graph = qcarchive_utils.get_graph(collection, record_name, spec_name="spec_2") def test_get_torsiondrive(): from espaloma.data import qcarchive_utils import numpy as np record_name = "[h]c1c(c(c(c([c:1]1[n:2]([c:3](=[o:4])c(=c([h])[h])[h])c([h])([h])[h])[h])[h])n(=o)=o)[h]" # example dataset name = "OpenFF Amide Torsion Set v1.0" collection_type = "torsiondrive" collection, record_names = qcarchive_utils.get_collection( qcarchive_utils.get_client(), collection_type, name ) record_info = collection.get_record(record_name, specification_name="default") ( flat_angles, xyz_in_order, energies_in_order, gradients_in_order, ) = qcarchive_utils.fetch_td_record(record_info) assert flat_angles.shape == (24,) assert energies_in_order.shape == (24,) assert gradients_in_order.shape == (24, 25, 3) assert xyz_in_order.shape == (24, 25, 3) assert np.isclose(energies_in_order[0], -722.2850260791969) assert np.all( flat_angles == np.array( [ -165, -150, -135, -120, -105, -90, -75, -60, -45, -30, -15, 0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, ] ) ) assert np.allclose( xyz_in_order[0][0], np.array([-0.66407807, -8.59922225, -0.02685972]) ) ================================================ FILE: espaloma/data/tests/test_save_and_load.py ================================================ import pytest def test_save_and_load(): import espaloma as esp g = esp.Graph("C") ds = esp.data.dataset.GraphDataset([g]) # Temporary directory will be automatically cleaned up from espaloma.data.utils import make_temp_directory with make_temp_directory() as tmpdir: import os filename = os.path.join(tmpdir, "ds") ds.save(filename) new_ds = esp.data.dataset.GraphDataset.load(filename) ================================================ FILE: espaloma/data/utils.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import random import numpy as np import pandas as pd import torch import contextlib import espaloma as esp OFFSETS = { 1: -0.500607632585, 6: -37.8302333826, 7: -54.5680045287, 8: -75.0362229210, } # ============================================================================== # UTILITY FUNCTIONS # ============================================================================== @contextlib.contextmanager def make_temp_directory(): import tempfile, shutil temp_dir = tempfile.mkdtemp() try: yield temp_dir finally: shutil.rmtree(temp_dir) def sum_offsets(elements): return sum([OFFSETS[element] for element in elements]) def from_csv(path, toolkit="rdkit", smiles_col=-1, y_cols=[-2], seed=2666): """Read csv from file.""" def _from_csv(): df = pd.read_csv(path) df_smiles = df.iloc[:, smiles_col] df_y = df.iloc[:, y_cols] if toolkit == "rdkit": from rdkit import Chem mols = [Chem.MolFromSmiles(smiles) for smiles in df_smiles] gs = [esp.HomogeneousGraph(mol) for mol in mols] elif toolkit == "openeye": from openeye import oechem mols = [ oechem.OESmilesToMol(oechem.OEGraphMol(), smiles) for smiles in df_smiles ] gs = [esp.HomogeneousGraph(mol) for mol in mols] ds = list(zip(gs, list(torch.tensor(df_y.values)))) random.seed(seed) random.shuffle(ds) return ds return _from_csv def normalize(ds): """Get mean and std.""" gs, ys = tuple(zip(*ds)) y_mean = np.mean(ys) y_std = np.std(ys) def norm(y): return (y - y_mean) / y_std def unnorm(y): return y * y_std + y_mean return y_mean, y_std, norm, unnorm def split(ds, partition): """Split the dataset according to some partition.""" n_data = len(ds) # get the actual size of partition partition = [int(n_data * x / sum(partition)) for x in partition] ds_batched = [] idx = 0 for p_size in partition: ds_batched.append(ds[idx : idx + p_size]) idx += p_size return ds_batched def batch(ds, batch_size, seed=2666): """Batch graphs and values after shuffling.""" import dgl # get the numebr of data n_data_points = len(ds) n_batches = n_data_points // batch_size # drop the rest random.seed(seed) random.shuffle(ds) gs, ys = tuple(zip(*ds)) gs_batched = [ dgl.batch(gs[idx * batch_size : (idx + 1) * batch_size]) for idx in range(n_batches) ] ys_batched = [ torch.stack(ys[idx * batch_size : (idx + 1) * batch_size], dim=0) for idx in range(n_batches) ] return list(zip(gs_batched, ys_batched)) def collate_fn(graphs): import dgl return esp.HomogeneousGraph(dgl.batch(graphs)) def infer_mol_from_coordinates( coordinates, species, smiles_ref=None, coordinates_unit="angstrom", ): # local import from openeye import oechem from openmm import unit from openmm.unit import Quantity if isinstance(coordinates_unit, str): coordinates_unit = getattr(unit, coordinates_unit) # make sure we have the coordinates # in the unit system coordinates = Quantity(coordinates, coordinates_unit).value_in_unit( unit.angstrom # to make openeye happy ) # initialize molecule mol = oechem.OEGraphMol() if all(isinstance(symbol, str) for symbol in species): [ mol.NewAtom(getattr(oechem, "OEElemNo_" + symbol)) for symbol in species ] elif all(isinstance(symbol, int) for symbol in species): [ mol.NewAtom( getattr( oechem, "OEElemNo_" + oechem.OEGetAtomicSymbol(symbol) ) ) for symbol in species ] else: raise RuntimeError( "The species can only be all strings or all integers." ) mol.SetCoords(coordinates.reshape([-1])) mol.SetDimension(3) oechem.OEDetermineConnectivity(mol) oechem.OEFindRingAtomsAndBonds(mol) oechem.OEPerceiveBondOrders(mol) if smiles_ref is not None: smiles_can = oechem.OECreateCanSmiString(mol) ims = oechem.oemolistream() ims.SetFormat(oechem.OEFormat_SMI) ims.openstring(smiles_ref) mol_ref = next(ims.GetOEMols()) smiles_ref = oechem.OECreateCanSmiString(mol_ref) assert ( smiles_ref == smiles_can ), "SMILES different. Input is %s, ref is %s" % ( smiles_can, smiles_ref, ) from openff.toolkit.topology import Molecule _mol = Molecule.from_openeye(mol, allow_undefined_stereo=True) g = esp.Graph(_mol) return g ================================================ FILE: espaloma/graphs/__init__.py ================================================ """The basic data structure of espaloma---graph is represent a molecular system and provide access to `dgl.DGLHeteroGraph` and `openff.toolkit.topology.Molecule. """ from . import deploy, utils from .legacy_force_field import * ================================================ FILE: espaloma/graphs/deploy.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import numpy as np import rdkit import torch from openff.toolkit.typing.engines.smirnoff import ForceField import espaloma as esp from openmm import unit from openmm.unit import Quantity import math # ============================================================================= # CONSTANTS # ============================================================================= OPENMM_LENGTH_UNIT = unit.nanometer OPENMM_ANGLE_UNIT = unit.radian OPENMM_ENERGY_UNIT = unit.kilojoule_per_mole OPENMM_BOND_EQ_UNIT = OPENMM_LENGTH_UNIT OPENMM_ANGLE_EQ_UNIT = OPENMM_ANGLE_UNIT OPENMM_TORSION_K_UNIT = OPENMM_ENERGY_UNIT OPENMM_TORSION_PHASE_UNIT = OPENMM_ANGLE_UNIT OPENMM_BOND_K_UNIT = OPENMM_ENERGY_UNIT / (OPENMM_LENGTH_UNIT**2) OPENMM_ANGLE_K_UNIT = OPENMM_ENERGY_UNIT / (OPENMM_ANGLE_UNIT**2) # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def load_forcefield(forcefield="openff_unconstrained-2.2.1"): # get a forcefield try: ff = ForceField("%s.offxml" % forcefield) except Exception as e: print(e) raise NotImplementedError return ff def openmm_system_from_graph( g, forcefield="openff_unconstrained-2.1.1", suffix="", charge_method="nn", create_system_kwargs={}, ): """Construct an openmm system from `espaloma.Graph`. Parameters ---------- g : `espaloma.Graph` Input graph. forcefield : `str`, optional, default='openff_unconstrained-2.1.1' Name of the force field. Have to be Open Force Field. (this forcefield will be used to assign nonbonded parameters, but all of its valence parameters will be overwritten) suffix : `str` Suffix for the force terms. charge_method : str, optional, default='nn' Method to use for assigning partial charges: 'nn' : Assign partial charges from the espaloma graph net model 'am1-bcc' : Allow the OpenFF toolkit to assign AM1-BCC charges using default backend 'gasteiger' : Assign Gasteiger partial charges (not recommended) 'from-molecule' : Use partial charges provided in the original `Molecule` object Returns ------- sys : `openmm.System` Constructed single-molecule OpenMM system. """ ff = load_forcefield(forcefield) # get the mapping between position and indices bond_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n2"].data["idxs"]) } angle_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n3"].data["idxs"]) } if charge_method == "gasteiger": # from rdkit.Chem.AllChem import ComputeGasteigerCharges # rdkit_mol = g.mol.to_rdkit() # ComputeGasteigerCharges(rdkit_mol) # charges = [atom.GetDoubleProp("_GasteigerCharge") for atom in rdkit_mol.GetAtoms()] g.mol.assign_partial_charges("gasteiger") sys = ff.create_openmm_system( g.mol.to_topology(), charge_from_molecules=[g.mol] ) elif charge_method == "am1-bcc": g.mol.assign_partial_charges("am1bcc") sys = ff.create_openmm_system( g.mol.to_topology(), charge_from_molecules=[g.mol] ) elif charge_method == "from-molecule": sys = ff.create_openmm_system( g.mol.to_topology(), charge_from_molecules=[g.mol] ) elif charge_method == "nn": g.mol.partial_charges = unit.elementary_charge * g.nodes["n1"].data[ "q" ].flatten().detach().cpu().numpy().astype( np.float64, ) sys = ff.create_openmm_system( g.mol.to_topology(), charge_from_molecules=[g.mol], allow_nonintegral_charges=True, ) else: # create openmm system raise RuntimeError( "Charge method %s is not supported. " % charge_method ) for force in sys.getForces(): name = force.__class__.__name__ if "HarmonicBondForce" in name: assert force.getNumBonds() * 2 == g.heterograph.number_of_nodes( "n2" ) for idx in range(force.getNumBonds()): idx0, idx1, eq, k = force.getBondParameters(idx) position = bond_lookup[(idx0, idx1)] _eq = ( g.nodes["n2"] .data["eq%s" % suffix][position] .detach() .numpy() .item() ) _k = ( g.nodes["n2"] .data["k%s" % suffix][position] .detach() .numpy() .item() ) _eq = Quantity( # bond length _eq, esp.units.DISTANCE_UNIT, ).value_in_unit(OPENMM_BOND_EQ_UNIT) _k = Quantity( # bond force constant: # since everything is enumerated twice in espaloma # and once in OpenMM, # we insert a coefficient of 2.0 _k, esp.units.FORCE_CONSTANT_UNIT, ).value_in_unit(OPENMM_BOND_K_UNIT) force.setBondParameters(idx, idx0, idx1, _eq, _k) if "HarmonicAngleForce" in name: assert force.getNumAngles() * 2 == g.heterograph.number_of_nodes( "n3" ) for idx in range(force.getNumAngles()): idx0, idx1, idx2, eq, k = force.getAngleParameters(idx) position = angle_lookup[(idx0, idx1, idx2)] _eq = ( g.nodes["n3"] .data["eq%s" % suffix][position] .detach() .numpy() .item() ) _k = ( g.nodes["n3"] .data["k%s" % suffix][position] .detach() .numpy() .item() ) _eq = Quantity( _eq, esp.units.ANGLE_UNIT, ).value_in_unit(OPENMM_ANGLE_EQ_UNIT) _k = Quantity( # force constant # since everything is enumerated twice in espaloma # and once in OpenMM, # we insert a coefficient of 2.0 _k, esp.units.ANGLE_FORCE_CONSTANT_UNIT, ).value_in_unit(OPENMM_ANGLE_K_UNIT) force.setAngleParameters(idx, idx0, idx1, idx2, _eq, _k) if "PeriodicTorsionForce" in name: number_of_torsions = force.getNumTorsions() if ( "periodicity%s" % suffix not in g.nodes["n4"].data or "phase%s" % suffix not in g.nodes["n4"].data ): g.nodes["n4"].data["periodicity%s" % suffix] = torch.arange( 1, 7 )[None, :].repeat(g.heterograph.number_of_nodes("n4"), 1) g.nodes["n4"].data["phases%s" % suffix] = torch.zeros( g.heterograph.number_of_nodes("n4"), 6 ) g.nodes["n4_improper"].data[ "periodicity%s" % suffix ] = torch.arange(1, 7)[None, :].repeat( g.heterograph.number_of_nodes("n4_improper"), 1 ) g.nodes["n4_improper"].data[ "phases%s" % suffix ] = torch.zeros( g.heterograph.number_of_nodes("n4_improper"), 6 ) count_idx = 0 for idx in range(g.heterograph.number_of_nodes("n4")): idx0 = g.nodes["n4"].data["idxs"][idx, 0].item() idx1 = g.nodes["n4"].data["idxs"][idx, 1].item() idx2 = g.nodes["n4"].data["idxs"][idx, 2].item() idx3 = g.nodes["n4"].data["idxs"][idx, 3].item() # assuming both (a,b,c,d) and (d,c,b,a) are listed for every torsion, only pick one of the orderings if idx0 < idx3: periodicities = g.nodes["n4"].data[ "periodicity%s" % suffix ][idx] phases = g.nodes["n4"].data["phases%s" % suffix][idx] ks = g.nodes["n4"].data["k%s" % suffix][idx] for sub_idx in range(ks.flatten().shape[0]): k = ks[sub_idx].item() if k != 0.0: _periodicity = periodicities[sub_idx].item() _phase = phases[sub_idx].item() if k < 0: k = -k _phase = math.pi - _phase k = Quantity( k, esp.units.ENERGY_UNIT, ).value_in_unit( OPENMM_ENERGY_UNIT, ) if count_idx < number_of_torsions: force.setTorsionParameters( # since everything is enumerated # twice in espaloma # and once in OpenMM, # we insert a coefficient of 2.0 count_idx, idx0, idx1, idx2, idx3, _periodicity, _phase, k, ) else: force.addTorsion( # since everything is enumerated # twice in espaloma # and once in OpenMM, # we insert a coefficient of 2.0 idx0, idx1, idx2, idx3, _periodicity, _phase, k, ) count_idx += 1 if "k%s" % suffix in g.nodes["n4_improper"].data: for idx in range( g.heterograph.number_of_nodes("n4_improper") ): idx0 = g.nodes["n4_improper"].data["idxs"][idx, 0].item() idx1 = g.nodes["n4_improper"].data["idxs"][idx, 1].item() idx2 = g.nodes["n4_improper"].data["idxs"][idx, 2].item() idx3 = g.nodes["n4_improper"].data["idxs"][idx, 3].item() periodicities = g.nodes["n4_improper"].data[ "periodicity%s" % suffix ][idx] phases = g.nodes["n4_improper"].data["phases%s" % suffix][ idx ] ks = g.nodes["n4_improper"].data["k%s" % suffix][idx] for sub_idx in range(ks.flatten().shape[0]): k = ks[sub_idx].item() if k != 0.0: _periodicity = periodicities[sub_idx].item() _phase = phases[sub_idx].item() if k < 0: k = -k _phase = math.pi - _phase k = Quantity( k, esp.units.ENERGY_UNIT, ).value_in_unit( OPENMM_ENERGY_UNIT, ) if count_idx < number_of_torsions: force.setTorsionParameters( # since everything is enumerated # twice in espaloma # and once in OpenMM, # we insert a coefficient of 2.0 count_idx, idx0, idx1, idx2, idx3, _periodicity, _phase, 0.5 * k, ) else: force.addTorsion( # since everything is enumerated # twice in espaloma # and once in OpenMM, # we insert a coefficient of 2.0 idx0, idx1, idx2, idx3, _periodicity, _phase, 0.5 * k, ) count_idx += 1 return sys ================================================ FILE: espaloma/graphs/graph.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import abc import io import openff.toolkit import espaloma as esp # ============================================================================= # MODULE CLASSES # ============================================================================= class BaseGraph(abc.ABC): """Base class of graph.""" def __init__(self): super(BaseGraph, self).__init__() class Graph(BaseGraph): """A unified graph object that support translation to and from message-passing graphs and MM factor graph. Methods ------- save(path) Save graph to file. load(path) Load a graph from path. Note ---- This object provides access to popular attributes of homograph and heterograph. This object also provides access to `ndata` and `edata` from the heterograph. Examples -------- >>> g0 = esp.Graph("C") >>> g1 = esp.Graph(Molecule.from_smiles("C")) >>> assert g0 == g1 """ def __init__(self, mol=None, homograph=None, heterograph=None): # TODO : more pythonic way allow multiple constructors: # Graph.from_smiles(...), Graph.from_mol(...), Graph.from_homograph(...), ... # rather than Graph(mol=None, homograph=None, ...) # input molecule if isinstance(mol, str): from openff.toolkit.topology import Molecule mol = Molecule.from_smiles(mol, allow_undefined_stereo=True) if mol is not None and homograph is None and heterograph is None: homograph = self.get_homograph_from_mol(mol) if homograph is not None and heterograph is None: heterograph = self.get_heterograph_from_graph_and_mol( homograph, mol ) self.mol = mol self.homograph = homograph self.heterograph = heterograph def save(self, path): import os import json import dgl os.mkdir(path) dgl.save_graphs(path + "/homograph.bin", [self.homograph]) dgl.save_graphs(path + "/heterograph.bin", [self.heterograph]) with open(path + "/mol.json", "w") as f_handle: json.dump(self.mol.to_json(), f_handle) @classmethod def load(cls, path): import json import dgl homograph = dgl.load_graphs(path + "/homograph.bin")[0][0] heterograph = dgl.load_graphs(path + "/heterograph.bin")[0][0] with open(path + "/mol.json", "r") as f_handle: mol = json.load(f_handle) from openff.toolkit.topology import Molecule # With OFF toolkit >=0.11, from_json requires the "hierarchy_schemes" key # which is not created with previous toolkit versions. That means, from_json # errors out when loading molecules that were json serialized with older # toolkit versions. try: mol = Molecule.from_json(mol) except KeyError: # this probably means hierarchy_schemes key wasn't found mol_dict = json.load(io.StringIO(mol)) if "hierarchy_schemes" not in mol_dict.keys(): mol_dict["hierarchy_schemes"] = dict() # Default to empty dict if not present mol = Molecule.from_dict(mol_dict) g = cls(mol=mol, homograph=homograph, heterograph=heterograph) return g @staticmethod def get_homograph_from_mol(mol): assert isinstance( mol, openff.toolkit.topology.Molecule ), "mol can only be OFF Molecule object." # TODO: # rewrite this using OFF-generic grammar # graph = esp.graphs.utils.read_homogeneous_graph.from_rdkit_mol( # mol.to_rdkit() # ) graph = ( esp.graphs.utils.read_homogeneous_graph.from_openff_toolkit_mol( mol ) ) return graph @staticmethod def get_heterograph_from_graph_and_mol(graph, mol): import dgl assert isinstance( graph, dgl.DGLGraph ), "graph can only be dgl Graph object." heterograph = esp.graphs.utils.read_heterogeneous_graph.from_homogeneous_and_mol( graph, mol ) return heterograph # # @property # def mol(self): # return self._mol # # @property # def homograph(self): # return self._homograph # # @property # def heterograph(self): # return self._heterograph @property def ndata(self): return self.homograph.ndata @property def edata(self): return self.homograph.edata @property def nodes(self): return self.heterograph.nodes ================================================ FILE: espaloma/graphs/legacy_force_field.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import rdkit import torch from openff.toolkit import Molecule import espaloma as esp from openmmforcefields.generators import SystemGenerator import openmm from openmm import unit from openmm.app import Simulation from openmm.unit import Quantity # ============================================================================= # CONSTANTS # ============================================================================= REDUNDANT_TYPES = { "cd": "cc", "cf": "ce", "cq": "cp", "pd": "pc", "pf": "pe", "nd": "nc", } # simulation specs TEMPERATURE = 350 * unit.kelvin STEP_SIZE = 1.0 * unit.femtosecond COLLISION_RATE = 1.0 / unit.picosecond EPSILON_MIN = 0.05 * unit.kilojoules_per_mole # ============================================================================= # MODULE CLASSES # ============================================================================= class LegacyForceField: """Class to hold legacy forcefield for typing and parameter assignment. Parameters ---------- forcefield : string name and version of the forcefield. Methods ------- parametrize() Parametrize a molecular system. typing() Provide legacy typing for a molecular system. """ def __init__(self, forcefield="gaff-1.81"): self.forcefield = forcefield self._prepare_forcefield() @staticmethod def _convert_to_off(mol): if isinstance(mol, esp.Graph): return mol.mol elif isinstance(mol, Molecule): return mol elif isinstance(mol, rdkit.Chem.rdchem.Mol): return Molecule.from_rdkit(mol) elif "openeye" in str( type(mol) ): # because we don't want to depend on OE return Molecule.from_openeye(mol) def _prepare_forcefield(self): if "gaff" in self.forcefield: self._prepare_gaff() elif "smirnoff" in self.forcefield: # do nothing for now self._prepare_smirnoff() elif "openff" in self.forcefield: self._prepare_openff() else: raise NotImplementedError def _prepare_openff(self): from openff.toolkit import ForceField self.FF = ForceField("%s.offxml" % self.forcefield) def _prepare_smirnoff(self): from openff.toolkit import ForceField self.FF = ForceField("%s.offxml" % self.forcefield) def _prepare_gaff(self): import os import xml.etree.ElementTree as ET import openmmforcefields # get the openff.toolkits path openmmforcefields_path = os.path.dirname(openmmforcefields.__file__) # get the xml path ffxml_path = ( openmmforcefields_path + "/ffxml/amber/gaff/ffxml/" + self.forcefield + ".xml" ) # parse xml tree = ET.parse(ffxml_path) root = tree.getroot() nonbonded = root.find("NonbondedForce") atom_types = [atom.get("class") for atom in nonbonded.findall("Atom")] # remove redundant types [atom_types.remove(bad_type) for bad_type in REDUNDANT_TYPES.keys()] # compose the translation dictionaries str_2_idx = dict(zip(atom_types, range(len(atom_types)))) idx_2_str = dict(zip(range(len(atom_types)), atom_types)) # provide mapping for redundant types for bad_type, good_type in REDUNDANT_TYPES.items(): str_2_idx[bad_type] = str_2_idx[good_type] # make translation dictionaries attributes of self self._str_2_idx = str_2_idx self._idx_2_str = idx_2_str def _type_gaff(self, g): """Type a molecular graph using gaff force fields.""" # assert the forcefield is indeed of gaff family assert "gaff" in self.forcefield # make sure mol is in openff.toolkit format ` mol = g.mol # import template generator from openmmforcefields.generators import GAFFTemplateGenerator gaff = GAFFTemplateGenerator( molecules=mol, forcefield=self.forcefield ) # create temporary directory for running antechamber import os import shutil import tempfile tempdir = tempfile.mkdtemp() prefix = "molecule" input_sdf_filename = os.path.join(tempdir, prefix + ".sdf") gaff_mol2_filename = os.path.join(tempdir, prefix + ".gaff.mol2") frcmod_filename = os.path.join(tempdir, prefix + ".frcmod") # write sdf for input mol.to_file(input_sdf_filename, file_format="sdf") # run antechamber gaff._run_antechamber( molecule_filename=input_sdf_filename, input_format="mdl", gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename, ) gaff._read_gaff_atom_types_from_mol2(gaff_mol2_filename, mol) gaff_types = [atom.gaff_type for atom in mol.atoms] shutil.rmtree(tempdir) # put types into graph object if g is None: g = esp.Graph(mol) g.nodes["n1"].data["legacy_typing"] = torch.tensor( [self._str_2_idx[atom] for atom in gaff_types] ) return g def _parametrize_gaff(self, g, n_max_phases=6): from openmmforcefields.generators import SystemGenerator # define a system generator system_generator = SystemGenerator( small_molecule_forcefield=self.forcefield, ) mol = g.mol # mol.assign_partial_charges("formal_charge") # create system sys = system_generator.create_system( topology=mol.to_topology().to_openmm(), molecules=mol, ) bond_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n2"].data["idxs"]) } angle_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n3"].data["idxs"]) } torsion_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n4"].data["idxs"]) } improper_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate( g.nodes["n4_improper"].data["idxs"] ) } torsion_phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) torsion_periodicities = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) torsion_ks = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) improper_phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) improper_periodicities = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) improper_ks = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) for force in sys.getForces(): name = force.__class__.__name__ if "HarmonicBondForce" in name: assert ( force.getNumBonds() * 2 == g.heterograph.number_of_nodes("n2") ) g.nodes["n2"].data["eq_ref"] = torch.zeros( force.getNumBonds() * 2, 1 ) g.nodes["n2"].data["k_ref"] = torch.zeros( force.getNumBonds() * 2, 1 ) for idx in range(force.getNumBonds()): idx0, idx1, eq, k = force.getBondParameters(idx) position = bond_lookup[(idx0, idx1)] g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) position = bond_lookup[(idx1, idx0)] g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) if "HarmonicAngleForce" in name: assert ( force.getNumAngles() * 2 == g.heterograph.number_of_nodes("n3") ) g.nodes["n3"].data["eq_ref"] = torch.zeros( force.getNumAngles() * 2, 1 ) g.nodes["n3"].data["k_ref"] = torch.zeros( force.getNumAngles() * 2, 1 ) for idx in range(force.getNumAngles()): idx0, idx1, idx2, eq, k = force.getAngleParameters(idx) position = angle_lookup[(idx0, idx1, idx2)] g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) position = angle_lookup[(idx2, idx1, idx0)] g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) if "PeriodicTorsionForce" in name: for idx in range(force.getNumTorsions()): ( idx0, idx1, idx2, idx3, periodicity, phase, k, ) = force.getTorsionParameters(idx) if (idx0, idx1, idx2, idx3) in torsion_lookup: position = torsion_lookup[(idx0, idx1, idx2, idx3)] for sub_idx in range(n_max_phases): if torsion_ks[position, sub_idx] == 0: torsion_ks[ position, sub_idx ] = 0.5 * k.value_in_unit( esp.units.ENERGY_UNIT ) torsion_phases[ position, sub_idx ] = phase.value_in_unit(esp.units.ANGLE_UNIT) torsion_periodicities[ position, sub_idx ] = periodicity position = torsion_lookup[ (idx3, idx2, idx1, idx0) ] torsion_ks[ position, sub_idx ] = 0.5 * k.value_in_unit( esp.units.ENERGY_UNIT ) torsion_phases[ position, sub_idx ] = phase.value_in_unit(esp.units.ANGLE_UNIT) torsion_periodicities[ position, sub_idx ] = periodicity break g.heterograph.apply_nodes( lambda nodes: { "k_ref": torsion_ks, "periodicity_ref": torsion_periodicities, "phases_ref": torsion_phases, }, ntype="n4", ) """ g.heterograph.apply_nodes( lambda nodes: { "k_ref": improper_ks, "periodicity_ref": improper_periodicities, "phases_ref": improper_phases, }, ntype="n4_improper" ) """ """ def apply_torsion(node, n_max_phases=6): phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) periodicity = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) k = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,) for idx in range(g.heterograph.number_of_nodes("n4")): idxs = tuple(node.data["idxs"][idx].numpy()) if idxs in force: _force = force[idxs] for sub_idx in range(len(_force.periodicity)): if hasattr(_force, "k%s" % sub_idx): k[idx, sub_idx] = getattr( _force, "k%s" % sub_idx ).value_in_unit(esp.units.ENERGY_UNIT) phases[idx, sub_idx] = getattr( _force, "phase%s" % sub_idx ).value_in_unit(esp.units.ANGLE_UNIT) periodicity[idx, sub_idx] = getattr( _force, "periodicity%s" % sub_idx ) return { "k_ref": k, "periodicity_ref": periodicity, "phases_ref": phases, } g.heterograph.apply_nodes(apply_torsion, ntype="n4") """ return g def _parametrize_smirnoff(self, g): from openff.units import unit as openff_unit OPENFF_FORCE_CONSTANT_UNIT = openff_unit forces = self.FF.label_molecules(g.mol.to_topology())[0] g.heterograph.apply_nodes( lambda node: { "k_ref": 2.0 * torch.Tensor( [ forces["Bonds"][ tuple(node.data["idxs"][idx].numpy()) ].k.to_openmm().value_in_unit(esp.units.FORCE_CONSTANT_UNIT) for idx in range(node.data["idxs"].shape[0]) ] )[:, None] }, ntype="n2", ) g.heterograph.apply_nodes( lambda node: { "eq_ref": torch.Tensor( [ forces["Bonds"][ tuple(node.data["idxs"][idx].numpy()) ].length.to_openmm().value_in_unit(esp.units.DISTANCE_UNIT) for idx in range(node.data["idxs"].shape[0]) ] )[:, None] }, ntype="n2", ) g.heterograph.apply_nodes( lambda node: { "k_ref": 2.0 * torch.Tensor( # OpenFF records 1/2k as param [ forces["Angles"][ tuple(node.data["idxs"][idx].numpy()) ].k.to_openmm().value_in_unit(esp.units.ANGLE_FORCE_CONSTANT_UNIT) for idx in range(node.data["idxs"].shape[0]) ] )[:, None] }, ntype="n3", ) g.heterograph.apply_nodes( lambda node: { "eq_ref": torch.Tensor( [ forces["Angles"][ tuple(node.data["idxs"][idx].numpy()) ].angle.to_openmm().value_in_unit(esp.units.ANGLE_UNIT) for idx in range(node.data["idxs"].shape[0]) ] )[:, None] }, ntype="n3", ) g.heterograph.apply_nodes( lambda node: { "epsilon_ref": torch.Tensor( [ forces["vdW"][(idx,)].epsilon.to_openmm().value_in_unit( esp.units.ENERGY_UNIT ) for idx in range(g.heterograph.number_of_nodes("n1")) ] )[:, None] }, ntype="n1", ) g.heterograph.apply_nodes( lambda node: { "sigma_ref": torch.Tensor( [ forces["vdW"][(idx,)].rmin_half.to_openmm().value_in_unit( esp.units.DISTANCE_UNIT ) for idx in range(g.heterograph.number_of_nodes("n1")) ] )[:, None] }, ntype="n1", ) def apply_torsion(node, n_max_phases=6): phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) periodicity = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) k = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) force = forces["ProperTorsions"] for idx in range(g.heterograph.number_of_nodes("n4")): idxs = tuple(node.data["idxs"][idx].numpy()) if idxs in force: _force = force[idxs] for sub_idx in range(len(_force.periodicity)): if hasattr(_force, "k%s" % sub_idx): k[idx, sub_idx] = getattr( _force, "k%s" % sub_idx ).to_openmm().value_in_unit(esp.units.ENERGY_UNIT) phases[idx, sub_idx] = getattr( _force, "phase%s" % sub_idx ).to_openmm().value_in_unit(esp.units.ANGLE_UNIT) periodicity[idx, sub_idx] = getattr( _force, "periodicity%s" % sub_idx ) return { "k_ref": k, "periodicity_ref": periodicity, "phases_ref": phases, } def apply_improper_torsion(node, n_max_phases=6): phases = torch.zeros( g.heterograph.number_of_nodes("n4_improper"), n_max_phases, ) periodicity = torch.zeros( g.heterograph.number_of_nodes("n4_improper"), n_max_phases, ) k = torch.zeros( g.heterograph.number_of_nodes("n4_improper"), n_max_phases, ) force = forces["ImproperTorsions"] for idx in range(g.heterograph.number_of_nodes("n4_improper")): idxs = tuple(node.data["idxs"][idx].numpy()) if idxs in force: _force = force[idxs] for sub_idx in range(len(_force.periodicity)): if hasattr(_force, "k%s" % sub_idx): k[idx, sub_idx] = getattr( _force, "k%s" % sub_idx ).to_openmm().value_in_unit(esp.units.ENERGY_UNIT) phases[idx, sub_idx] = getattr( _force, "phase%s" % sub_idx ).to_openmm().value_in_unit(esp.units.ANGLE_UNIT) periodicity[idx, sub_idx] = getattr( _force, "periodicity%s" % sub_idx ) return { "k_ref": k, "periodicity_ref": periodicity, "phases_ref": phases, } g.heterograph.apply_nodes(apply_torsion, ntype="n4") g.heterograph.apply_nodes(apply_improper_torsion, ntype="n4_improper") return g def baseline_energy(self, g, suffix=None): if suffix is None: suffix = "_" + self.forcefield from openmmforcefields.generators import SystemGenerator # define a system generator system_generator = SystemGenerator( small_molecule_forcefield=self.forcefield, ) mol = g.mol # mol.assign_partial_charges("formal_charge") # create system system = system_generator.create_system( topology=mol.to_topology().to_openmm(), molecules=mol, ) # parameterize topology topology = g.mol.to_topology().to_openmm() integrator = openmm.LangevinIntegrator( TEMPERATURE, COLLISION_RATE, STEP_SIZE ) # create simulation simulation = Simulation( topology=topology, system=system, integrator=integrator ) us = [] xs = ( Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ) .value_in_unit(unit.nanometer) .transpose((1, 0, 2)) ) for x in xs: simulation.context.setPositions(x) us.append( simulation.context.getState(getEnergy=True) .getPotentialEnergy() .value_in_unit(esp.units.ENERGY_UNIT) ) g.nodes["g"].data["u%s" % suffix] = torch.tensor(us)[None, :] return g def _multi_typing_smirnoff(self, g): # mol = self._convert_to_off(mol) forces = self.FF.label_molecules(g.mol.to_topology())[0] g.heterograph.apply_nodes( lambda node: { "legacy_typing": torch.Tensor( [ int( forces["Bonds"][ tuple(node.data["idxs"][idx].numpy()) ].id[1:] ) for idx in range(node.data["idxs"].shape[0]) ] ).long() }, ntype="n2", ) g.heterograph.apply_nodes( lambda node: { "legacy_typing": torch.Tensor( [ int( forces["Angles"][ tuple(node.data["idxs"][idx].numpy()) ].id[1:] ) for idx in range(node.data["idxs"].shape[0]) ] ).long() }, ntype="n3", ) g.heterograph.apply_nodes( lambda node: { "legacy_typing": torch.Tensor( [ int(forces["vdW"][(idx,)].id[1:]) for idx in range(g.heterograph.number_of_nodes("n1")) ] ).long() }, ntype="n1", ) return g def parametrize(self, g): """Parametrize a molecular graph.""" if "smirnoff" in self.forcefield or "openff" in self.forcefield: return self._parametrize_smirnoff(g) elif "gaff" in self.forcefield: return self._parametrize_gaff(g) else: raise NotImplementedError def typing(self, g): """Type a molecular graph.""" if "gaff" in self.forcefield: return self._type_gaff(g) else: raise NotImplementedError def multi_typing(self, g): """Type a molecular graph for hetero nodes.""" if "smirnoff" in self.forcefield: return self._multi_typing_smirnoff(g) else: raise NotImplementedError def __call__(self, *args, **kwargs): return self.typing(*args, **kwargs) ================================================ FILE: espaloma/graphs/tests/test_deploy.py ================================================ import openmm import urllib.request import numpy.testing as npt import espaloma as esp from openmm import unit omm_angle_unit = unit.radian omm_energy_unit = unit.kilojoule_per_mole from openmm.unit import Quantity def test_butane_charge_am1bcc(): """check that esp.graphs.deploy.openmm_system_from_graph runs without error on butane using am1-bcc charge method""" ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0") g = esp.Graph("CCCC") g = ff.parametrize(g) esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="am1-bcc") def test_butane_charge_nn(): """check that esp.graphs.deploy.openmm_system_from_graph runs without error on butane using the nn charge method""" import torch # Download serialized espaloma model url = f'https://github.com/choderalab/espaloma/releases/download/0.3.0/espaloma-0.3.0rc1.pt' espaloma_model_filepath = f'espaloma-0.3.0rc1.pt' urllib.request.urlretrieve(url, filename=espaloma_model_filepath) # Test deployment ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0") g = esp.Graph("CCCC") g = ff.parametrize(g) # apply a trained espaloma model to assign parameters net = torch.load(espaloma_model_filepath, map_location=torch.device('cpu')) net.eval() net(g.heterograph) esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="nn") def test_caffeine(): """Test Openmm system deployment of caffeine method using the charges from the molecule runs without error.""" ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0") g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") g = ff.parametrize(g) g.mol.assign_partial_charges("am1bcc") # Assign charges after parametrizing esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="from-molecule") def test_parameter_consistent_caffeine(): ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0") g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") g = ff.parametrize(g) system = esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="am1-bcc") forces = list(system.getForces()) openff_forces = ff.FF.label_molecules(g.mol.to_topology())[0] for idx, force in enumerate(forces): force.setForceGroup(idx) name = force.__class__.__name__ if "HarmonicBondForce" in name: for _idx in range(force.getNumBonds()): start, end, eq, k_openmm = force.getBondParameters(_idx) k_openff = openff_forces["Bonds"][(start, end)].k.to_openmm() npt.assert_almost_equal( k_openmm / k_openff, 2.0, decimal=3, ) def test_energy_consistent_caffeine(): """Deploy a caffeine molecule parametrized by a traditional force field and deployed by espaloma, make sure the energies computed using espaloma and OpenMM are same or close. """ # grab a force field ff = esp.graphs.legacy_force_field.LegacyForceField("openff-1.2.0") # parametrize caffeine molecule using the parametrization ## Should there be a second test for SMIRNOFF impropers? g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") g = ff.parametrize(g) system = esp.graphs.deploy.openmm_system_from_graph(g, suffix="_ref", charge_method="am1-bcc") # compute energies using espaloma import torch g.nodes["n1"].data["xyz"] = torch.randn( g.heterograph.number_of_nodes("n1"), 1, 3 ) esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph( g.heterograph, terms=["n2", "n3", "n4", "n4_improper"], suffix="_ref" ) # compute energies using OpenMM with bond, angle, and torsion breakdown forces = list(system.getForces()) energies = {} for idx, force in enumerate(forces): force.setForceGroup(idx) name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) # epsilons = {} # sigmas = {} # for _idx in range(force.getNumParticles()): # q, sigma, epsilon = force.getParticleParameters(_idx) # # record parameters # epsilons[_idx] = epsilon # sigmas[_idx] = sigma # force.setParticleParameters(_idx, 0., sigma, epsilon) # def sigma_combining_rule(sig1, sig2): # return (sig1 + sig2) / 2 # def eps_combining_rule(eps1, eps2): # return np.sqrt(np.abs(eps1 * eps2)) # for _idx in range(force.getNumExceptions()): # idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( # _idx) # force.setExceptionParameters( # _idx, # idx0, # idx1, # 0.0, # sigma_combining_rule(sigmas[idx0], sigmas[idx1]), # eps_combining_rule(epsilons[idx0], epsilons[idx1]) # ) # force.updateParametersInContext(_simulation.context) # create new simulation _simulation = openmm.app.Simulation( g.mol.to_topology().to_openmm(), system, openmm.VerletIntegrator(0.0), ) _simulation.context.setPositions( Quantity( g.nodes["n1"].data["xyz"][:, 0, :].numpy(), unit=esp.units.DISTANCE_UNIT, ).value_in_unit(unit.nanometer) ) for idx, force in enumerate(forces): name = force.__class__.__name__ state = _simulation.context.getState( getEnergy=True, getParameters=True, groups=2**idx, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT ) energies[name] = energy # test if bond energies are equal npt.assert_almost_equal( g.nodes["g"].data["u_n2_ref"].numpy(), energies["HarmonicBondForce"], decimal=3, ) # test if angle energies are equal npt.assert_almost_equal( g.nodes["g"].data["u_n3_ref"].numpy(), energies["HarmonicAngleForce"], decimal=3, ) # test if torsion energies are equal npt.assert_almost_equal( g.nodes["g"].data["u_n4_ref"].numpy() + g.nodes["g"].data["u_n4_improper_ref"].numpy(), energies["PeriodicTorsionForce"], decimal=3, ) # TODO: test that desired parameters are assigned ================================================ FILE: espaloma/graphs/tests/test_gaff_parametrize.py ================================================ import pytest import espaloma as esp def test_gaff_parametrize(): ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81") g = esp.Graph( "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", ) ff.parametrize(g) print(g.nodes["n2"].data) print(g.nodes["n3"].data) print(g.nodes["n4"].data) print(g.nodes["n4_improper"].data) ================================================ FILE: espaloma/graphs/tests/test_graph.py ================================================ import io import json import pytest import shutil import importlib_resources import espaloma as esp def test_graph(): import espaloma as esp g = esp.Graph("c1ccccc1") print(g.heterograph) @pytest.fixture def graph(): import espaloma as esp return esp.Graph("c1ccccc1") def test_ndata_consistency(graph): import torch assert torch.equal(graph.ndata["h0"], graph.nodes["n1"].data["h0"]) @pytest.mark.parametrize( "molecule, charge", [ pytest.param("C", 0, id="methane"), pytest.param("[NH4+]", 1, id="Ammonium"), pytest.param("CC(=O)[O-]", -1, id="Acetate"), ], ) def test_formal_charge(molecule, charge): import espaloma as esp graph = esp.Graph(molecule) assert graph.nodes["g"].data["sum_q"].numpy()[0] == charge def test_save_and_load(graph): import tempfile with tempfile.TemporaryDirectory() as tempdir: graph.save(tempdir + "/g.esp") new_graph = esp.Graph.load(tempdir + "/g.esp") assert graph.homograph.number_of_nodes() == new_graph.homograph.number_of_nodes() assert graph.homograph.number_of_edges() == new_graph.homograph.number_of_edges() def test_load_from_older_openff(tmp_path_factory): """Tests creating a graph from a json-serialized mol with older openff-toolkit version (0.10.x) This checks that the serialized molecule doesn't have the expected hierarchy_schemes key, which will be created on the fly when loaded as a graph. This tests creates a graph with """ # Load json serialized off 0.10.6 molecule and save it in path from openff.toolkit import Molecule mol_json_path = importlib_resources.files('espaloma.data') / 'off-mol_0_10_6.json' with open(str(mol_json_path), "r") as json_file: # This loads it as a string -- seems like an off toolkit limitation mol_json_str = json.load(json_file) mol_dict = json.load(io.StringIO(mol_json_str)) assert "hierarchy_schemes" not in mol_dict, "Serialized json mol contains unexpected key." # Save json molecule in path out_esp_dir_1 = tmp_path_factory.mktemp("esp1") shutil.copy(mol_json_path, out_esp_dir_1 / "mol.json") # update dicitonary and create espaloma graph with the same molecule mol_dict["hierarchy_schemes"] = dict() off_molecule = Molecule.from_dict(mol_dict) smiles = off_molecule.to_smiles() g = esp.Graph(smiles) # Save the graph out_esp_dir_2 = tmp_path_factory.mktemp("esp2") / "esp-test" g.save(str(out_esp_dir_2)) # copy homo/hetero-graphs to original dir shutil.copy(out_esp_dir_2 / "homograph.bin", out_esp_dir_1) shutil.copy(out_esp_dir_2 / "heterograph.bin", out_esp_dir_1) # Load espaloma from original directory -- with mol serialized from off 0.10.6 esp_graph = esp.Graph.load(str(out_esp_dir_1)) assert esp_graph.mol == g.mol, f"Read molecule from esp graph, {esp_graph.mol} is not " \ f"the same as the expected molecule {off_molecule}." # TODO: test offmol_indices # TODO: test relationship_indices_from_offmol ================================================ FILE: espaloma/graphs/tests/test_smirnoff.py ================================================ import pytest import espaloma as esp def test_smirnoff_esol_first(): ff = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ) g = esp.data.esol(first=1)[0] g = ff.parametrize(g) # def test_smirnoff_strange_mol(): # ff = esp.graphs.legacy_force_field.LegacyForceField("smirnoff99Frosst-1.1.0") # g = esp.Graph( # "[H]c1c(nc(n(=O)c1N([H])[H])N([H])[H])N2C(C(C(C(C2([H])[H])([H])[H])([H])[H])([H])[H])([H])[H]" # ) # g = ff.parametrize(g) # # # def test_multi_typing(): # ff = esp.graphs.legacy_force_field.LegacyForceField("smirnoff99Frosst-1.1.0") # g = esp.data.esol(first=1)[0] # g = ff.multi_typing(g) ================================================ FILE: espaloma/graphs/utils/__init__.py ================================================ import espaloma.graphs.utils.read_heterogeneous_graph import espaloma.graphs.utils.read_homogeneous_graph ================================================ FILE: espaloma/graphs/utils/offmol_indices.py ================================================ import numpy as np from openff.toolkit.topology import Molecule def atom_indices(offmol: Molecule) -> np.ndarray: return np.array([a.molecule_atom_index for a in offmol.atoms]) def bond_indices(offmol: Molecule) -> np.ndarray: return np.array([(b.atom1_index, b.atom2_index) for b in offmol.bonds]) def angle_indices(offmol: Molecule) -> np.ndarray: return np.array( sorted( [ tuple([atom.molecule_atom_index for atom in angle]) for angle in offmol.angles ] ) ) def proper_torsion_indices(offmol: Molecule) -> np.ndarray: return np.array( sorted( [ tuple([atom.molecule_atom_index for atom in proper]) for proper in offmol.propers ] ) ) def _all_improper_torsion_indices(offmol: Molecule) -> np.ndarray: """ "[*:1]~[*:2](~[*:3])~[*:4]" matches""" return np.array( sorted( [ tuple([atom.molecule_atom_index for atom in improper]) for improper in offmol.impropers ] ) ) def improper_torsion_indices( offmol: Molecule, improper_def="espaloma" ) -> np.ndarray: """ "[*:1]~[X3:2](~[*:3])~[*:4]" matches (_all_improper_torsion_indices returns "[*:1]~[*:2](~[*:3])~[*:4]" matches) improper_def allows for choosing which atom will be the central atom in the permutations: smirnoff: central atom is listed first espaloma: central atom is listed second Addtionally, for smirnoff, only take the subset of atoms that corresponds to the ccw traversal of connected atoms. Notes ----- Motivation: offmol.impropers returns a large number of impropers, and we may wish to restrict this number. May update this filter definition based on discussion in https://github.com/openff.toolkit/openff.toolkit/issues/746 """ ## Find all atoms bound to exactly 3 other atoms if improper_def == "espaloma": ## This finds all orderings, which is what we want for the espaloma case ## but not for smirnoff improper_smarts = "[*:1]~[X3:2](~[*:3])~[*:4]" mol_idxs = offmol.chemical_environment_matches(improper_smarts) return np.array(mol_idxs) elif improper_def == "smirnoff": improper_smarts = "[*:2]~[X3:1](~[*:3])~[*:4]" ## For smirnoff ordering, we only want to find the unique combinations ## of atoms forming impropers so we can permute them the way we want mol_idxs = offmol.chemical_environment_matches( improper_smarts, unique=True ) ## Get all ccw orderings # feels like there should be some good way to do this with itertools... idx_permuts = [] for c, *other_atoms in mol_idxs: for i in range(3): idx = [c] for j in range(3): idx.append(other_atoms[(i + j) % 3]) idx_permuts.append(tuple(idx)) return np.array(idx_permuts) else: raise ValueError(f"Unknown value for improper_def: {improper_def}") ================================================ FILE: espaloma/graphs/utils/read_heterogeneous_graph.py ================================================ """ Build heterogeneous graph from homogeneous ones. """ # ============================================================================= # IMPORTS # ============================================================================= import numpy as np import torch from espaloma.graphs.utils import offmol_indices from openff.toolkit.topology import Molecule from typing import Dict # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= def duplicate_index_ordering(indices: np.ndarray) -> np.ndarray: """For every (a,b,c,d) add a (d,c,b,a) TODO: is there a way to avoid this duplication? >>> indices = np.array([[0, 1, 2, 3], [1, 2, 3, 4]]) >>> duplicate_index_ordering(indices) array([[0, 1, 2, 3], [1, 2, 3, 4], [3, 2, 1, 0], [4, 3, 2, 1]]) """ return np.concatenate([indices, np.flip(indices, axis=-1)], axis=0) def relationship_indices_from_offmol( offmol: Molecule, ) -> Dict[str, torch.Tensor]: """Construct a dictionary that maps node names (like "n2") to torch tensors of indices Notes ----- * introduces 2x redundant indices (including (d,c,b,a) for every (a,b,c,d)) for compatibility with later processing """ idxs = dict() idxs["n1"] = offmol_indices.atom_indices(offmol) idxs["n2"] = offmol_indices.bond_indices(offmol) idxs["n3"] = offmol_indices.angle_indices(offmol) idxs["n4"] = offmol_indices.proper_torsion_indices(offmol) idxs["n4_improper"] = offmol_indices.improper_torsion_indices(offmol) if len(idxs["n4"]) == 0: idxs["n4"] = np.empty((0, 4)) if len(idxs["n4_improper"]) == 0: idxs["n4_improper"] = np.empty((0, 4)) # TODO: enumerate indices for coupling-term nodes also # TODO: big refactor of term names from "n4" to "proper_torsion", "improper_torsion", "angle_angle_coupling", etc. # TODO (discuss with YW) : I think "n1" and "n4_improper" shouldn't be 2x redundant in current scheme # (also, unclear why we need "n2", "n3", "n4" to be 2x redundant, but that's something to consider changing later) for key in ["n2", "n3", "n4"]: idxs[key] = duplicate_index_ordering(idxs[key]) # make them all torch.Tensors for key in idxs: idxs[key] = torch.from_numpy(idxs[key]) return idxs def from_homogeneous_and_mol(g, offmol): r"""Build heterogeneous graph from homogeneous ones. Note ---- For now we name single node, two-, three, and four-, hypernodes as `n1`, `n2`, `n3`, and `n4`. These correspond to atom, bond, angle, and torsion nodes in chemical graphs. Parameters ---------- g : `espaloma.HomogeneousGraph` object the homogeneous graph to be translated. Returns ------- hg : `espaloma.HeterogeneousGraph` object the resulting heterogeneous graph. """ # initialize empty dictionary hg = {} # get adjacency matrix a = g.adjacency_matrix() # get all the indices idxs = relationship_indices_from_offmol(offmol) # make them all numpy idxs = {key: value.numpy() for key, value in idxs.items()} # also include n1 idxs["n1"] = np.arange(g.number_of_nodes())[:, None] # ========================= # neighboring relationships # ========================= # NOTE: # here we only define the neighboring relationship # on atom level hg[("n1", "n1_neighbors_n1", "n1")] = idxs["n2"] # build a mapping between indices and the ordering idxs_to_ordering = {} for term in ["n1", "n2", "n3", "n4", "n4_improper"]: idxs_to_ordering[term] = { tuple(subgraph_idxs): ordering for (ordering, subgraph_idxs) in enumerate(list(idxs[term])) } # =============================================== # relationships between nodes of different levels # =============================================== # NOTE: # here we define all the possible # 'has' and 'in' relationships. # TODO: # we'll test later to see if this adds too much overhead # for small_idx in range(1, 5): for big_idx in range(small_idx + 1, 5): for pos_idx in range(big_idx - small_idx + 1): hg[ ( "n%s" % small_idx, "n%s_as_%s_in_n%s" % (small_idx, pos_idx, big_idx), "n%s" % big_idx, ) ] = np.stack( [ np.array( [ idxs_to_ordering["n%s" % small_idx][tuple(x)] for x in idxs["n%s" % big_idx][ :, pos_idx : pos_idx + small_idx ] ] ), np.arange(idxs["n%s" % big_idx].shape[0]), ], axis=1, ) hg[ ( "n%s" % big_idx, "n%s_has_%s_n%s" % (big_idx, pos_idx, small_idx), "n%s" % small_idx, ) ] = np.stack( [ np.arange(idxs["n%s" % big_idx].shape[0]), np.array( [ idxs_to_ordering["n%s" % small_idx][tuple(x)] for x in idxs["n%s" % big_idx][ :, pos_idx : pos_idx + small_idx ] ] ), ], axis=1, ) # ====================================== # nonbonded terms # ====================================== # NOTE: everything is counted twice here # nonbonded is where # $A = AA = AAA = AAAA = 0$ # make dense a_ = a.to_dense().detach().numpy() idxs["nonbonded"] = np.stack( np.where(np.equal(a_ + a_ @ a_ + a_ @ a_ @ a_, 0.0)), axis=-1, ) # onefour is the two ends of torsion # idxs["onefour"] = np.stack( # [ # idxs["n4"][:, 0], # idxs["n4"][:, 3], # ], # axis=1, # ) idxs["onefour"] = np.stack( np.where( np.equal(a_ + a_ @ a_, 0.0) * np.greater(a_ @ a_ @ a_, 0.0), ), axis=-1, ) # membership for term in ["nonbonded", "onefour"]: for pos_idx in [0, 1]: hg[(term, "%s_has_%s_n1" % (term, pos_idx), "n1")] = np.stack( [np.arange(idxs[term].shape[0]), idxs[term][:, pos_idx]], axis=-1, ) hg[("n1", "n1_as_%s_in_%s" % (pos_idx, term), term)] = np.stack( [ idxs[term][:, pos_idx], np.arange(idxs[term].shape[0]), ], axis=-1, ) # membership of n1 in n4_improper for term in ["n4_improper"]: for pos_idx in [0, 1, 2, 3]: hg[(term, "%s_has_%s_n1" % (term, pos_idx), "n1")] = np.stack( [np.arange(idxs[term].shape[0]), idxs[term][:, pos_idx]], axis=-1, ) hg[("n1", "n1_as_%s_in_%s" % (pos_idx, term), term)] = np.stack( [ idxs[term][:, pos_idx], np.arange(idxs[term].shape[0]), ], axis=-1, ) # ====================================== # relationships between nodes and graphs # ====================================== for term in [ "n1", "n2", "n3", "n4", "n4_improper", "nonbonded", "onefour", ]: hg[(term, "%s_in_g" % term, "g",)] = np.stack( [np.arange(len(idxs[term])), np.zeros(len(idxs[term]))], axis=1, ) hg[("g", "g_has_%s" % term, term)] = np.stack( [ np.zeros(len(idxs[term])), np.arange(len(idxs[term])), ], axis=1, ) import dgl hg = dgl.heterograph( {key: value.astype(np.int32).tolist() for key, value in hg.items()} ) hg.nodes["n1"].data["h0"] = g.ndata["h0"] hg.nodes["g"].data["sum_q"] = g.ndata["sum_q"][0].reshape(1, 1) # include indices in the nodes themselves for term in [ "n1", "n2", "n3", "n4", "n4_improper", "onefour", "nonbonded", ]: hg.nodes[term].data["idxs"] = torch.tensor(idxs[term]) return hg ================================================ FILE: espaloma/graphs/utils/read_homogeneous_graph.py ================================================ """ Build simple graph from OpenEye or RDKit molecule object. """ # ============================================================================= # IMPORTS # ============================================================================= import torch # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= def fp_oe(atom): from openeye import oechem HYBRIDIZATION_OE = { oechem.OEHybridization_sp: torch.tensor( [1, 0, 0, 0, 0], dtype=torch.get_default_dtype() ), oechem.OEHybridization_sp2: torch.tensor( [0, 1, 0, 0, 0], dtype=torch.get_default_dtype() ), oechem.OEHybridization_sp3: torch.tensor( [0, 0, 1, 0, 0], dtype=torch.get_default_dtype() ), oechem.OEHybridization_sp3d: torch.tensor( [0, 0, 0, 1, 0], dtype=torch.get_default_dtype() ), oechem.OEHybridization_sp3d2: torch.tensor( [0, 0, 0, 0, 1], dtype=torch.get_default_dtype() ), oechem.OEHybridization_Unknown: torch.tensor( [0, 0, 0, 0, 0], dtype=torch.get_default_dtype() ), } return torch.cat( [ torch.tensor( [ atom.GetDegree(), # Note: Discard resonance-variant features # Issue related to https://github.com/choderalab/espaloma_charge/issues/18 # atom.GetValence(), # atom.GetExplicitValence(), # atom.GetFormalCharge(), atom.IsAromatic() * 1.0, atom.GetIsotope(), # TODO: is this a good idea? oechem.OEAtomIsInRingSize(atom, 3) * 1.0, oechem.OEAtomIsInRingSize(atom, 4) * 1.0, oechem.OEAtomIsInRingSize(atom, 5) * 1.0, oechem.OEAtomIsInRingSize(atom, 6) * 1.0, oechem.OEAtomIsInRingSize(atom, 7) * 1.0, oechem.OEAtomIsInRingSize(atom, 8) * 1.0, ], dtype=torch.float32, ), HYBRIDIZATION_OE[atom.GetHyb()], ], dim=0, ) def fp_rdkit(atom): from rdkit import Chem HYBRIDIZATION_RDKIT = { Chem.rdchem.HybridizationType.SP: torch.tensor( [1, 0, 0, 0, 0], dtype=torch.get_default_dtype(), ), Chem.rdchem.HybridizationType.SP2: torch.tensor( [0, 1, 0, 0, 0], dtype=torch.get_default_dtype(), ), Chem.rdchem.HybridizationType.SP3: torch.tensor( [0, 0, 1, 0, 0], dtype=torch.get_default_dtype(), ), Chem.rdchem.HybridizationType.SP3D: torch.tensor( [0, 0, 0, 1, 0], dtype=torch.get_default_dtype(), ), Chem.rdchem.HybridizationType.SP3D2: torch.tensor( [0, 0, 0, 0, 1], dtype=torch.get_default_dtype(), ), Chem.rdchem.HybridizationType.S: torch.tensor( [0, 0, 0, 0, 0], dtype=torch.get_default_dtype(), ), } return torch.cat( [ torch.tensor( [ atom.GetTotalDegree(), # Note: Discard resonance-variant features # Issue related to https://github.com/choderalab/espaloma_charge/issues/18 # atom.GetTotalValence(), # atom.GetExplicitValence(), # atom.GetFormalCharge(), atom.GetIsAromatic() * 1.0, atom.GetMass(), atom.IsInRingSize(3) * 1.0, atom.IsInRingSize(4) * 1.0, atom.IsInRingSize(5) * 1.0, atom.IsInRingSize(6) * 1.0, atom.IsInRingSize(7) * 1.0, atom.IsInRingSize(8) * 1.0, ], dtype=torch.get_default_dtype(), ), HYBRIDIZATION_RDKIT[atom.GetHybridization()], ], dim=0, ) # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def from_openff_toolkit_mol(mol, use_fp=True): import dgl # initialize graph g = dgl.DGLGraph() # enter nodes n_atoms = mol.n_atoms g.add_nodes(n_atoms) g.ndata["type"] = torch.Tensor( [[atom.atomic_number] for atom in mol.atoms] ) total_charge = mol.total_charge.magnitude g.ndata["sum_q"] = torch.Tensor( [[total_charge] for _ in range(mol.n_atoms)] ) h_v = torch.zeros( g.ndata["type"].shape[0], 100, dtype=torch.get_default_dtype() ) h_v[ torch.arange(g.ndata["type"].shape[0]), torch.squeeze(g.ndata["type"]).long(), ] = 1.0 h_v_fp = torch.stack( [fp_rdkit(atom) for atom in mol.to_rdkit().GetAtoms()], axis=0 ) if use_fp == True: h_v = torch.cat([h_v, h_v_fp], dim=-1) # (n_atoms, 117) g.ndata["h0"] = h_v # enter bonds bonds = list(mol.bonds) bonds_begin_idxs = [bond.atom1_index for bond in bonds] bonds_end_idxs = [bond.atom2_index for bond in bonds] bonds_types = [bond.bond_order for bond in bonds] # NOTE: dgl edges are directional g.add_edges(bonds_begin_idxs, bonds_end_idxs) g.add_edges(bonds_end_idxs, bonds_begin_idxs) # g.edata["type"] = torch.Tensor(bonds_types)[:, None].repeat(2, 1) return g def from_oemol(mol, use_fp=True): import dgl # initialize graph g = dgl.DGLGraph() # enter nodes n_atoms = mol.NumAtoms() g.add_nodes(n_atoms) g.ndata["type"] = torch.Tensor( [[atom.GetAtomicNum()] for atom in mol.GetAtoms()] ) g.ndata["formal_charge"] = torch.Tensor( [[atom.GetFormalCharge()] for atom in mol.GetAtoms()] ) h_v = torch.zeros(g.ndata["type"].shape[0], 100, dtype=torch.float32) h_v[ torch.arange(g.ndata["type"].shape[0]), torch.squeeze(g.ndata["type"]).long(), ] = 1.0 h_v_fp = torch.stack([fp_oe(atom) for atom in mol.GetAtoms()], axis=0) if use_fp == True: h_v = torch.cat([h_v, h_v_fp], dim=-1) # (n_atoms, 117) g.ndata["h0"] = h_v # enter bonds bonds = list(mol.GetBonds()) bonds_begin_idxs = [bond.GetBgnIdx() for bond in bonds] bonds_end_idxs = [bond.GetEndIdx() for bond in bonds] bonds_types = [bond.GetOrder() for bond in bonds] # NOTE: dgl edges are directional g.add_edges(bonds_begin_idxs, bonds_end_idxs) g.add_edges(bonds_end_idxs, bonds_begin_idxs) # g.edata["type"] = torch.Tensor(bonds_types)[:, None].repeat(2, 1) return g def from_rdkit_mol(mol, use_fp=True): import dgl # initialize graph g = dgl.DGLGraph() # enter nodes n_atoms = mol.GetNumAtoms() g.add_nodes(n_atoms) g.ndata["type"] = torch.Tensor( [[atom.GetAtomicNum()] for atom in mol.GetAtoms()] ) g.ndata["formal_charge"] = torch.Tensor( [[atom.GetFormalCharge()] for atom in mol.GetAtoms()] ) h_v = torch.zeros(g.ndata["type"].shape[0], 100, dtype=torch.float32) h_v[ torch.arange(g.ndata["type"].shape[0]), torch.squeeze(g.ndata["type"]).long(), ] = 1.0 h_v_fp = torch.stack([fp_rdkit(atom) for atom in mol.GetAtoms()], axis=0) if use_fp == True: h_v = torch.cat([h_v, h_v_fp], dim=-1) # (n_atoms, 117) g.ndata["h0"] = h_v # enter bonds bonds = list(mol.GetBonds()) bonds_begin_idxs = [bond.GetBeginAtomIdx() for bond in bonds] bonds_end_idxs = [bond.GetEndAtomIdx() for bond in bonds] bonds_types = [bond.GetBondType().real for bond in bonds] # NOTE: dgl edges are directional g.add_edges(bonds_begin_idxs, bonds_end_idxs) g.add_edges(bonds_end_idxs, bonds_begin_idxs) # g.edata["type"] = torch.Tensor(bonds_types)[:, None].repeat(2, 1) return g ================================================ FILE: espaloma/graphs/utils/regenerate_impropers.py ================================================ import dgl import numpy as np import torch from .offmol_indices import improper_torsion_indices from ..graph import Graph def regenerate_impropers(g: Graph, improper_def="smirnoff"): """ Method to regenerate the improper nodes according to the specified method of permuting the impropers. Modifies the esp.Graph's heterograph in place and returns the new heterograph. NOTE: This will clear the data on all n4_improper nodes, including previously generated improper from JanossyPoolingImproper. """ ## First get rid of the old nodes/edges hg = g.heterograph hg = dgl.remove_nodes(hg, hg.nodes("n4_improper"), "n4_improper") ## Generate new improper torsion permutations idxs = improper_torsion_indices(g.mol, improper_def) if len(idxs) == 0: return g ## Add new nodes of type n4_improper (one for each permut) hg = dgl.add_nodes(hg, idxs.shape[0], ntype="n4_improper") ## New edges b/n improper permuts and n1 nodes permut_ids = np.arange(idxs.shape[0]) for i in range(4): n1_ids = idxs[:, i] # edge from improper node to n1 node outgoing_etype = ("n4_improper", f"n4_improper_has_{i}_n1", "n1") hg = dgl.add_edges(hg, permut_ids, n1_ids, etype=outgoing_etype) # edge from n1 to improper incoming_etype = ("n1", f"n1_as_{i}_in_n4_improper", "n4_improper") hg = dgl.add_edges(hg, n1_ids, permut_ids, etype=incoming_etype) ## New edges b/n improper permuts and the graph (for global pooling) # edge from improper node to graph outgoing_etype = ("n4_improper", f"n4_improper_in_g", "g") hg = dgl.add_edges( hg, permut_ids, np.zeros_like(permut_ids), etype=outgoing_etype ) # edge from graph to improper nodes incoming_etype = ("g", "g_has_n4_improper", "n4_improper") hg = dgl.add_edges( hg, np.zeros_like(permut_ids), permut_ids, etype=incoming_etype ) hg.nodes["n4_improper"].data["idxs"] = torch.tensor(idxs) g.heterograph = hg return g # hg ================================================ FILE: espaloma/metrics.py ================================================ """ Metrics to evaluate and train models. """ import abc # ============================================================================= # IMPORTS # ============================================================================= import torch import numpy as np from .units import GAS_CONSTANT # ============================================================================= # HELPER FUNCTIONS # ============================================================================= def center(metric, dim=1, reduction="none"): def _centered(input, target, metric=metric, dim=dim): # center input input = input - input.mean(dim=dim, keepdim=True) # center target target = target - target.mean(dim=dim, keepdim=True) if reduction == "none": return metric(input, target) else: return getattr(torch, reduction)(metric(input, target)) return _centered def boltzmann_weighted(metric, reduction="mean", temperature=300.0): def _weighted(input, target, metric=metric, reduction=reduction): _loss = metric(input, target) min_target, _ = torch.min(target, dim=-1, keepdims=True) delta_target = target - min_target weight_target = torch.softmax( -delta_target / (GAS_CONSTANT * temperature), dim=-1, ) _loss = _loss * weight_target return getattr(torch, reduction)(_loss) return _weighted def std(metric, weight=1.0, dim=1): def _std(input, target, metric=metric, weight=weight, dim=dim): return weight * metric(input, target).std(dim=dim).sum() return _std def weighted(metric, weight, reduction="mean"): def _weighted( input, target, metric=metric, weight=weight, reduction=reduction ): _loss = metric(input, target) for _ in range(_loss.dims() - 1): weight = weight.unsqueeze(-1) return getattr(torch, reduction)(weight) return _weighted def weighted_with_key(metric, key="weight", reduction="mean"): def _weighted(input, target, metric=metric, key=key, reduction=reduction): weight = target.nodes["g"].data[key].flatten() _loss = metric(input, target) for _ in range(_loss.dims() - 1): weight = weight.unsqueeze(-1) return getattr(torch, reduction)(weight) return _weighted def bootstrap(metric, n_samples=100, ci=0.95): def _bootstrap(input, target, metric=metric, n_samples=n_samples, ci=ci): original = metric(input=input, target=target) idxs_all = np.arange(input.shape[0]) results = [] for _ in range(n_samples): idxs = np.random.choice( idxs_all, len(idxs_all), replace=True, ) _metric = ( metric(input=input[idxs], target=target[idxs]) .detach() .cpu() .numpy() .item() ) results.append( _metric, ) results = np.array(results) low = np.percentile(results, 100.0 * 0.5 * (1 - ci)) high = np.percentile(results, (1 - ((1 - ci) * 0.5)) * 100.0) return original.detach().cpu().numpy().item(), low, high return _bootstrap def latex_format_ci(original, low, high): return "$%.4f_{%.4f}^{%.4f}$" % (original, low, high) # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def mse(input, target): return torch.nn.functional.mse_loss(target, input) def mape(input, target): return ((input - target).abs() / target.abs()).mean() def rmse(input, target): return torch.sqrt(torch.nn.functional.mse_loss(target, input)) def mae_of_log(input, target): return torch.nn.L1Loss()(torch.log(input), torch.log(target)) def cross_entropy(input, target, reduction="mean"): loss_fn = torch.nn.CrossEntropyLoss(reduction=reduction) return loss_fn(input=input, target=target) # prediction first, logit def r2(input, target): target = target.flatten() input = input.flatten() ss_tot = (target - target.mean()).pow(2).sum() ss_res = (input - target).pow(2).sum() return 1 - torch.div(ss_res, ss_tot) def accuracy(input, target): # check if this is logit if input.dim() == 2 and input.shape[-1] > 1: input = input.argmax(dim=-1) return torch.div((input == target).sum().double(), target.shape[0]) # ============================================================================= # MODULE CLASSES # ============================================================================= class Metric(torch.nn.modules.loss._Loss): """Base function for loss.""" def __init__(self, size_average=None, reduce=None, reduction="mean"): super(Metric, self).__init__(size_average, reduce, reduction) @abc.abstractmethod def forward(self, *args, **kwargs): raise NotImplementedError class GraphMetric(Metric): """Loss between nodes attributes of graph or graphs. Parameters ---------- base_metric : callable Metric on fixed dimensional space. between : List[str] Names of quantities to compare. level : str Level of nodes to compare with. Returns ------- torch.Tensor """ def __init__(self, base_metric, between, level="n1", *args, **kwargs): super(GraphMetric, self).__init__(*args, **kwargs) # between could be tuple of two strings or two functions assert len(between) == 2 self.between = ( self._translation(between[0], level), self._translation(between[1], level), ) self.base_metric = base_metric # get name if hasattr(base_metric, "__name__"): base_name = base_metric.__name__ else: base_name = base_metric.__class__.__name__ self.__name__ = "%s_between_%s_and_%s_on_%s" % ( base_name, between[0], between[1], level, ) @staticmethod def _translation(string, level): return lambda g: g.nodes[level].data[string] def forward(self, g_input, g_target=None): """Forward function of loss.""" # allow loss within self if g_target is None: g_target = g_input # get input and output transform function input_fn, target_fn = self.between # compute loss using base loss # NOTE: # use keyward argument here since torch is bad with the order with args return self.base_metric( input=input_fn(g_input), target=target_fn(g_target) ) class GraphDerivativeMetric(Metric): """Loss between nodes attributes of graph or graphs.""" def __init__( self, base_metric, between, weight=1.0, level="n1", d="xyz", d_level="n1", *args, **kwargs ): super(GraphDerivativeMetric, self).__init__(*args, **kwargs) # between could be tuple of two strings or two functions assert len(between) == 2 self.between = ( self._translation(between[0], level), self._translation(between[1], level), ) self.d = self._translation(d, d_level) self.base_metric = base_metric self.weight = weight # get name if hasattr(base_metric, "__name__"): base_name = base_metric.__name__ else: base_name = base_metric.__class__.__name__ self.__name__ = "%s_between_d_%s_d_%s_and_d_%s_d_%s_on_%s" % ( base_name, between[0], d, between[1], d, level, ) @staticmethod def _translation(string, level): return lambda g: g.nodes[level].data[string] def forward(self, g_input, g_target=None): """Forward function of loss.""" # allow loss within self if g_target is None: g_target = g_input # get input and output transform function input_fn, target_fn = self.between # calculate the derivatives of input input_prime = torch.autograd.grad( input_fn(g_input).sum(), self.d(g_input), create_graph=True, retain_graph=True, allow_unused=True, )[0] target_prime = torch.autograd.grad( target_fn(g_target).sum(), self.d(g_target), create_graph=True, retain_graph=True, allow_unused=True, )[0] # compute loss using base loss # NOTE: # use keyward argument here since torch is bad with the order with args return self.weight * self.base_metric( input=input_prime, target=target_prime, ) class GraphHalfDerivativeMetric(Metric): """Loss between nodes attributes of graph or graphs.""" def __init__( self, base_metric, input_level="g", input_name="u", target_prime_level="n1", target_prime_name="u_ref_prime", d="xyz", d_level="n1", weight=1.0, *args, **kwargs ): super(GraphHalfDerivativeMetric, self).__init__(*args, **kwargs) # define query functions self.d = self._translation(d, d_level) self.input_fn = self._translation(input_name, input_level) self.target_prime_fn = self._translation( target_prime_name, target_prime_level ) self.base_metric = base_metric self.weight = weight # get name if hasattr(base_metric, "__name__"): base_name = base_metric.__name__ else: base_name = base_metric.__class__.__name__ self.__name__ = "%s_between_%s_d_%s_on_%s_and_%s_on_%s" % ( base_name, input_name, d, input_level, target_prime_name, target_prime_level, ) @staticmethod def _translation(string, level): return lambda g: g.nodes[level].data[string] def forward(self, g_input, g_target=None): """Forward function of loss.""" # allow loss within self if g_target is None: g_target = g_input # calculate the derivatives of input input_prime = torch.autograd.grad( self.input_fn(g_input).sum(), self.d(g_input), create_graph=True, retain_graph=True, )[0] target_prime = self.target_prime_fn(g_target) # compute loss using base loss # NOTE: # use keyward argument here since torch is bad with the order with args return self.weight * self.base_metric( input=input_prime, target=target_prime, ) # ============================================================================= # PRESETS # ============================================================================= class TypingCrossEntropy(GraphMetric): def __init__(self): super(TypingCrossEntropy, self).__init__( base_metric=torch.nn.CrossEntropyLoss(), between=["nn_typing", "legacy_typing"], ) self.__name__ = "TypingCrossEntropy" class TypingAccuracy(GraphMetric): def __init__(self): super(TypingAccuracy, self).__init__( base_metric=accuracy, between=["nn_typing", "legacy_typing"] ) self.__name__ = "TypingAccuracy" class BondKMSE(GraphMetric): def __init__(self): super(BondKMSE, self).__init__( between=["k_ref", "k"], level="n2", base_metric=torch.nn.MSELoss() ) self.__name__ = "BondKMSE" class BondKRMSE(GraphMetric): def __init__(self): super(BondKRMSE, self).__init__( between=["k_ref", "k"], level="n2", base_metric=rmse ) self.__name__ = "BondKRMSE" ================================================ FILE: espaloma/mm/__init__.py ================================================ from . import angle, bond, energy, functional, geometry, nonbonded, torsion ================================================ FILE: espaloma/mm/angle.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import espaloma as esp # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def harmonic_angle(x, k, eq): """Harmonic angle energy. Parameters ---------- x : `torch.Tensor`, `shape = (batch_size, 1)` angle value k : `torch.Tensor`, `shape = (batch_size, 1)` force constant eq : `torch.Tensor`, `shape = (batch_size, 1)` equilibrium angle Returns ------- u : `torch.Tensor`, `shape = (batch_size, 1)` energy """ # NOTE: # the constant 0.5 is included here but not in the functional forms # NOTE: # 0.25 because all angles are calculated twice return 0.5 * esp.mm.functional.harmonic(x=x, k=k, eq=eq) def linear_mixture_angle(x, coefficients, phases): """Angle energy with Linear basis function. Parameters ---------- coefficients : torch.Tensor Coefficients of the linear mixuture. phases : torch.Tensor Phases of the linear mixture. """ return 0.5 * esp.mm.functional.linear_mixture( x=x, coefficients=coefficients, phases=phases ) def urey_bradley(x_between, coefficients, phases): return esp.mm.functional.linear_mixture( x=x_between, coefficients=coefficients, phases=phases, ) def bond_bond(u_left, u_right, k_bond_bond): u_left = u_left - u_left.min(dim=-1, keepdims=True)[0] u_right = u_right - u_right.min(dim=-1, keepdims=True)[0] return k_bond_bond * (u_left**0.5) * (u_right**0.5) def bond_angle( u_left, u_right, u_angle, k_bond_angle, ): u_left = u_left - u_left.min(dim=-1, keepdims=True)[0] u_right = u_right - u_right.min(dim=-1, keepdims=True)[0] u_angle = u_angle - u_angle.min(dim=-1, keepdims=True)[0] return k_bond_angle * (u_left**0.5) * ( u_angle**0.5 ) + k_bond_angle * (u_right**0.5) * (u_angle**0.5) def angle_high( u_angle, k3, k4, ): u_angle = u_angle - u_angle.min(dim=-1, keepdims=True)[0] return k3 * u_angle**1.5 + k4 * u_angle**2 ================================================ FILE: espaloma/mm/bond.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import espaloma as esp # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def harmonic_bond(x, k, eq): """Harmonic bond energy. Parameters ---------- x : `torch.Tensor`, `shape = (batch_size, 1)` bond length k : `torch.Tensor`, `shape = (batch_size, 1)` force constant eq : `torch.Tensor`, `shape = (batch_size, 1)` equilibrium bond length Returns ------- u : `torch.Tensor`, `shape = (batch_size, 1)` energy """ # NOTE: # the constant is included here but not in the functional forms # NOTE: # 0.25 because all bonds are calculated twice return 0.5 * esp.mm.functional.harmonic(x=x, k=k, eq=eq) def gaussian_bond(x, coefficients): """Bond energy with Gaussian basis function.""" return esp.mm.functional.gaussian( x=x, coefficients=coefficients, ) def linear_mixture_bond(x, coefficients, phases): """Bond energy with Linear basis function. Parameters ---------- coefficients : torch.Tensor Coefficients of the linear mixuture. phases : torch.Tensor Phases of the linear mixture. """ return 0.5 * esp.mm.functional.linear_mixture( x=x, coefficients=coefficients, phases=phases ) def bond_high(u_bond, k3, k4): u_bond = u_bond - u_bond.min(dim=-1, keepdims=True)[0] return k3 * u_bond**1.5 + k4 * u_bond**2 ================================================ FILE: espaloma/mm/energy.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch import espaloma as esp # ============================================================================= # ENERGY IN HYPERNODES---BONDED # ============================================================================= def apply_bond(nodes, suffix=""): """Bond energy in nodes.""" # if suffix == '_ref': return { "u%s" % suffix: esp.mm.bond.harmonic_bond( x=nodes.data["x"], k=nodes.data["k%s" % suffix], eq=nodes.data["eq%s" % suffix], ) } # else: # return { # 'u%s' % suffix: esp.mm.bond.harmonic_bond_re( # x=nodes.data['x'], # k=nodes.data['k%s' % suffix], # eq=nodes.data['eq%s' % suffix], # ) # } def apply_angle(nodes, suffix=""): """Angle energy in nodes.""" return { "u%s" % suffix: esp.mm.angle.harmonic_angle( x=nodes.data["x"], k=nodes.data["k%s" % suffix], eq=nodes.data["eq%s" % suffix], ) } def apply_angle_ii(nodes, suffix=""): return { # "u_angle_high%s" # % suffix: esp.mm.angle.angle_high( # u_angle=nodes.data["u"], # k3=nodes.data["k3"], # k4=nodes.data["k4"], # ), "u_urey_bradley%s" % suffix: esp.mm.angle.urey_bradley( x_between=nodes.data["x_between"], coefficients=nodes.data["coefficients_urey_bradley"], phases=[0.0, 12.0], ), "u_bond_bond%s" % suffix: esp.mm.angle.bond_bond( u_left=nodes.data["u_left"], u_right=nodes.data["u_right"], k_bond_bond=nodes.data["k_bond_bond"], ), "u_bond_angle%s" % suffix: esp.mm.angle.bond_angle( u_left=nodes.data["u_left"], u_right=nodes.data["u_right"], u_angle=nodes.data["u"], k_bond_angle=nodes.data["k_bond_angle"], ), } def apply_bond_ii(nodes, suffix=""): return { "u_bond_high%s" % suffix: esp.mm.bond.bond_high( u_bond=nodes.data["u"], k3=nodes.data["k3"], k4=nodes.data["k4"], ) } def apply_torsion_ii(nodes, suffix=""): """Torsion energy in nodes.""" return { "u_angle_angle%s" % suffix: esp.mm.torsion.angle_angle( u_angle_left=nodes.data["u_angle_left"], u_angle_right=nodes.data["u_angle_right"], k_angle_angle=nodes.data["k_angle_angle"], ), "u_angle_torsion%s" % suffix: esp.mm.torsion.angle_torsion( u_angle_left=nodes.data["u_angle_left"], u_angle_right=nodes.data["u_angle_right"], u_torsion=nodes.data["u"], k_angle_torsion=nodes.data["k_angle_torsion"], ), "u_angle_angle_torsion%s" % suffix: esp.mm.torsion.angle_angle_torsion( u_angle_left=nodes.data["u_angle_left"], u_angle_right=nodes.data["u_angle_right"], u_torsion=nodes.data["u"], k_angle_angle_torsion=nodes.data["k_angle_angle_torsion"], ), "u_bond_torsion%s" % suffix: esp.mm.torsion.bond_torsion( u_bond_left=nodes.data["u_bond_left"], u_bond_right=nodes.data["u_bond_right"], u_bond_center=nodes.data["u_bond_center"], u_torsion=nodes.data["u"], k_side_torsion=nodes.data["k_side_torsion"], k_center_torsion=nodes.data["k_center_torsion"], ), } def apply_torsion(nodes, suffix=""): """Torsion energy in nodes.""" if ( "phases%s" % suffix in nodes.data and "periodicity%s" % suffix in nodes.data ): return { "u%s" % suffix: esp.mm.torsion.periodic_torsion( x=nodes.data["x"], k=nodes.data["k%s" % suffix], phases=nodes.data["phases%s" % suffix], periodicity=nodes.data["periodicity%s" % suffix], ) } else: return { "u%s" % suffix: esp.mm.torsion.periodic_torsion( x=nodes.data["x"], k=nodes.data["k%s" % suffix], ) } def apply_improper_torsion(nodes, suffix=""): """Improper torsion energy in nodes.""" if ( "phases%s" % suffix in nodes.data and "periodicity%s" % suffix in nodes.data ): return { "u%s" % suffix: esp.mm.torsion.periodic_torsion( x=nodes.data["x"], k=nodes.data["k%s" % suffix], phases=nodes.data["phases%s" % suffix], periodicity=nodes.data["periodicity%s" % suffix], ) } else: n_multi = nodes.data["k%s" % suffix].shape[-1] periodicity=list(range(1, n_multi+1)) phases=[0.0 for _ in range(n_multi)] return { "u%s" % suffix: esp.mm.torsion.periodic_torsion( x=nodes.data["x"], k=nodes.data["k%s" % suffix], phases=phases, periodicity=periodicity, ) } def apply_bond_gaussian(nodes, suffix=""): """Bond energy in nodes.""" # if suffix == '_ref': return { "u%s" % suffix: esp.mm.bond.gaussian_bond( x=nodes.data["x"], coefficients=nodes.data["coefficients%s" % suffix], ) } def apply_bond_linear_mixture(nodes, suffix="", phases=[0.0, 1.0]): """Bond energy in nodes.""" # if suffix == '_ref': return { "u%s" % suffix: esp.mm.bond.linear_mixture_bond( x=nodes.data["x"], coefficients=nodes.data["coefficients%s" % suffix], phases=phases, ) } def apply_angle_linear_mixture(nodes, suffix="", phases=[0.0, 1.0]): """Bond energy in nodes.""" # if suffix == '_ref': return { "u%s" % suffix: esp.mm.angle.linear_mixture_angle( x=nodes.data["x"], coefficients=nodes.data["coefficients%s" % suffix], phases=phases, ) } # ============================================================================= # ENERGY IN HYPERNODES---NONBONDED # ============================================================================= def apply_nonbonded(nodes, scaling=1.0, suffix=""): """Nonbonded in nodes.""" # TODO: should this be 9-6 or 12-6? return { "u%s" % suffix: scaling * esp.mm.nonbonded.lj_12_6( x=nodes.data["x"], sigma=nodes.data["sigma%s" % suffix], epsilon=nodes.data["epsilon%s" % suffix], ) } def apply_coulomb(nodes, scaling=1.0, suffix=""): return { "u%s" % suffix: scaling * esp.mm.nonbonded.coulomb( x=nodes.data["x"], q=nodes.data["q"], ) } # ============================================================================= # ENERGY IN GRAPH # ============================================================================= def energy_in_graph( g, suffix="", terms=["n2", "n3", "n4"], ): # "onefour", "nonbonded"]): """Calculate the energy of a small molecule given parameters and geometry. Parameters ---------- g : `dgl.DGLHeteroGraph` Input graph. Returns ------- g : `dgl.DGLHeteroGraph` Output graph. Notes ----- This function modifies graphs in-place. """ # TODO: this is all very restricted for now # we need to make this better import dgl if "n2" in terms: # apply energy function if "coefficients%s" % suffix in g.nodes["n2"].data: g.apply_nodes( lambda node: apply_bond_linear_mixture( node, suffix=suffix, phases=[1.5, 6.0] ), ntype="n2", ) else: g.apply_nodes( lambda node: apply_bond(node, suffix=suffix), ntype="n2", ) if "n3" in terms: if "coefficients%s" % suffix in g.nodes["n3"].data: import math g.apply_nodes( lambda node: apply_angle_linear_mixture( node, suffix=suffix, phases=[0.0, math.pi] ), ntype="n3", ) else: g.apply_nodes( lambda node: apply_angle(node, suffix=suffix), ntype="n3", ) if g.number_of_nodes("n4") > 0 and "n4" in terms: g.apply_nodes( lambda node: apply_torsion(node, suffix=suffix), ntype="n4", ) if g.number_of_nodes("n4_improper") > 0 and "n4_improper" in terms: g.apply_nodes( lambda node: apply_improper_torsion(node, suffix=suffix), ntype="n4_improper", ) # if g.number_of_nodes("nonbonded") > 0 and "nonbonded" in terms: # g.apply_nodes( # lambda node: apply_nonbonded(node, suffix=suffix), # ntype="nonbonded", # ) # if g.number_of_nodes("onefour") > 0 and "onefour" in terms: # g.apply_nodes( # lambda node: apply_nonbonded( # node, # suffix=suffix, # scaling=0.5, # ), # ntype="onefour", # ) if "nonbonded" in terms or "onefour" in terms: esp.mm.nonbonded.multiply_charges(g) if "nonbonded" in terms and g.number_of_nodes("nonbonded") > 0: g.apply_nodes( lambda node: apply_coulomb( node, suffix=suffix, scaling=1.0, ), ntype="nonbonded", ) if "onefour" in terms and g.number_of_nodes("onefour") > 0: g.apply_nodes( lambda node: apply_coulomb( node, suffix=suffix, # scaling=0.5, scaling=0.8333333333333334, ), ntype="onefour", ) # sum up energy # bonded g.multi_update_all( { "%s_in_g" % term: ( dgl.function.copy_u(u="u%s" % suffix, out="m_%s" % term), dgl.function.sum( msg="m_%s" % term, out="u_%s%s" % (term, suffix) ), ) for term in terms if "u%s" % suffix in g.nodes[term].data }, cross_reducer="sum", ) g.apply_nodes( lambda node: { "u%s" % suffix: sum( node.data["u_%s%s" % (term, suffix)] for term in terms if "u_%s%s" % (term, suffix) in node.data ) }, ntype="g", ) if "u0" in g.nodes["g"].data: g.apply_nodes( lambda node: {"u": node.data["u"] + node.data["u0"]}, ntype="g", ) return g def energy_in_graph_ii( g, suffix="", ): if g.number_of_nodes("n3") > 0: g.apply_nodes( lambda node: apply_angle_ii(node, suffix=suffix), ntype="n3", ) g.apply_nodes( lambda node: { "u%s" % suffix: node.data["u%s" % suffix] + node.data["u_urey_bradley%s" % suffix] + node.data["u_bond_bond%s" % suffix] + node.data["u_bond_angle%s" % suffix] }, ntype="n3", ) if g.number_of_nodes("n4") > 0: g.apply_nodes( lambda node: apply_torsion_ii(node, suffix=suffix), ntype="n4", ) g.apply_nodes( lambda node: { "u%s" % suffix: node.data["u%s" % suffix] + node.data["u_angle_angle%s" % suffix] + node.data["u_angle_torsion%s" % suffix] + node.data["u_angle_angle_torsion%s" % suffix] + node.data["u_bond_torsion%s" % suffix] }, ntype="n4", ) return g class EnergyInGraph(torch.nn.Module): def __init__(self, *args, **kwargs): super(EnergyInGraph, self).__init__() self.args = args self.kwargs = kwargs def forward(self, g): return energy_in_graph(g, *self.args, **self.kwargs) class EnergyInGraphII(torch.nn.Module): def __init__(self, *args, **kwargs): super(EnergyInGraphII, self).__init__() self.args = args self.kwargs = kwargs def forward(self, g): return energy_in_graph_ii(g, *self.args, **self.kwargs) class CarryII(torch.nn.Module): def forward(self, g): import math import dgl g.multi_update_all( { "n2_as_0_in_n3": ( dgl.function.copy_u("u", "m_u_0"), dgl.function.sum("m_u_0", "u_left"), ), "n2_as_1_in_n3": ( dgl.function.copy_u("u", "m_u_1"), dgl.function.sum("m_u_1", "u_right"), ), "n2_as_0_in_n4": ( dgl.function.copy_u("u", "m_u_0"), dgl.function.sum("m_u_0", "u_bond_left"), ), "n2_as_1_in_n4": ( dgl.function.copy_u("u", "m_u_1"), dgl.function.sum("m_u_1", "u_bond_center"), ), "n2_as_2_in_n4": ( dgl.function.copy_u("u", "m_u_2"), dgl.function.sum("m_u_2", "u_bond_right"), ), "n3_as_0_in_n4": ( dgl.function.copy_u("u", "m3_u_0"), dgl.function.sum("m3_u_0", "u_angle_left"), ), "n3_as_1_in_n4": ( dgl.function.copy_u("u", "m3_u_1"), dgl.function.sum("m3_u_1", "u_angle_right"), ), }, cross_reducer="sum", ) return g ================================================ FILE: espaloma/mm/functional.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import math import torch import espaloma as esp # ============================================================================= # CONSTANTS # ============================================================================= from openmm import unit from openmm.unit import Quantity LJ_SWITCH = Quantity(1.0, unit.angstrom).value_in_unit( esp.units.DISTANCE_UNIT ) # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= def linear_mixture_to_original(k1, k2, b1, b2): """Translating linear mixture coefficients back to original parameterization. """ # (batch_size, ) k = k1 + k2 # (batch_size, ) b = (k1 * b1 + k2 * b2) / (k + 1e-7) return k, b # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def harmonic(x, k, eq, order=[2]): """Harmonic term. Parameters ---------- x : `torch.Tensor`, `shape=(batch_size, 1)` k : `torch.Tensor`, `shape=(batch_size, len(order))` eq : `torch.Tensor`, `shape=(batch_size, len(order))` order : `int` or `List` of `int` Returns ------- u : `torch.Tensor`, `shape=(batch_size, 1)` """ if isinstance(order, list): order = torch.tensor(order, device=x.device) return ( 0.5 * k * ((x - eq)).pow(order[:, None, None]).permute(1, 2, 0).sum(dim=-1) ) def periodic_fixed_phases( dihedrals: torch.Tensor, ks: torch.Tensor ) -> torch.Tensor: """Periodic torsion term with n_phases = 6, periodicities = 1..n_phases, phases = zeros Parameters ---------- dihedrals : torch.Tensor, shape=(n_snapshots, n_dihedrals) dihedral angles -- TODO: confirm in radians? ks : torch.Tensor, shape=(n_dihedrals, n_phases) force constants -- TODO: confirm in esp.unit.ENERGY_UNIT ? Returns ------- u : torch.Tensor, shape=(n_snapshots, 1) potential energy of each snapshot Notes ----- TODO: is there a way to annotate / type-hint tensor shapes? (currently adding many assert statements) TODO: merge with esp.mm.functional.periodic -- adding this because I was having difficulty debugging runtime tensor shape errors in esp.mm.functional.periodic, which allows for a more flexible mix of input shapes and types """ # periodicity = 1..n_phases n_phases = 6 periodicity = torch.arange(n_phases) + 1 # assert input shape consistency n_snapshots, n_dihedrals = dihedrals.shape n_dihedrals_, n_phases_ = ks.shape assert n_dihedrals == n_dihedrals_ assert n_phases == n_phases_ # promote everything to this shape stacked_shape = (n_snapshots, n_dihedrals, n_phases) # duplicate ks n_snapshots times ks_stacked = torch.stack([ks] * n_snapshots, dim=0) assert ks_stacked.shape == stacked_shape # duplicate dihedral angles n_phases times dihedrals_stacked = torch.stack([dihedrals] * n_phases, dim=2) assert dihedrals_stacked.shape == stacked_shape # duplicate periodicity n_snapshots * n_dihedrals times ns = torch.stack( [torch.stack([periodicity] * n_snapshots)] * n_dihedrals, dim=1 ) assert ns.shape == stacked_shape # compute k_n * cos(n * theta) for n in 1..n_phases, for each dihedral in each snapshot energy_terms = ks_stacked * torch.cos(ns * dihedrals_stacked) assert energy_terms.shape == stacked_shape # sum over n_dihedrals and n_phases energy_sums = energy_terms.sum(dim=(1, 2)) assert energy_sums.shape == (n_snapshots,) return energy_sums.reshape((n_snapshots, 1)) def periodic( x, k, periodicity=list(range(1, 7)), phases=[0.0 for _ in range(6)] ): """Periodic term. Parameters ---------- x : `torch.Tensor`, `shape=(batch_size, 1)` k : `torch.Tensor`, `shape=(batch_size, number_of_phases)` periodicity: either list of length number_of_phases, or `torch.Tensor`, `shape=(batch_size, number_of_phases)` phases : either list of length number_of_phases, or `torch.Tensor`, `shape=(batch_size, number_of_phases)` """ if isinstance(phases, list): phases = torch.tensor(phases, device=x.device) if isinstance(periodicity, list): periodicity = torch.tensor( periodicity, device=x.device, dtype=torch.get_default_dtype(), ) if periodicity.ndim == 1: periodicity = periodicity[None, None, :].repeat( x.shape[0], x.shape[1], 1 ) elif periodicity.ndim == 2: periodicity = periodicity[:, None, :].repeat(1, x.shape[1], 1) if phases.ndim == 1: phases = phases[None, None, :].repeat( x.shape[0], x.shape[1], 1, ) elif phases.ndim == 2: phases = phases[:, None, :].repeat( 1, x.shape[1], 1, ) n_theta = periodicity * x[:, :, None] n_theta_minus_phases = n_theta - phases cos_n_theta_minus_phases = n_theta_minus_phases.cos() k = k[:, None, :].repeat(1, x.shape[1], 1) # energy = (k * (1.0 + cos_n_theta_minus_phases)).sum(dim=-1) energy = ( torch.nn.functional.relu(k) * (cos_n_theta_minus_phases + 1.0) - torch.nn.functional.relu(0.0 - k) * (cos_n_theta_minus_phases - 1.0) ).sum(dim=-1) return energy # simple implementation # def harmonic(x, k, eq): # return k * (x - eq) ** 2 # # def harmonic_re(x, k, eq, a=0.0, b=0.3): # # temporary # ka = k # kb = eq # # c = ((ka * a + kb * b) / (ka + kb)) ** 2 - a ** 2 - b ** 2 # # return ka * (x - a) ** 2 + kb * (x - b) ** 2 def lj( x, epsilon, sigma, order=[12, 6], coefficients=[1.0, 1.0], switch=LJ_SWITCH, ): r"""Lennard-Jones term. Notes ----- ..math:: E = \epsilon ((\sigma / r) ^ {12} - (\sigma / r) ^ 6) Parameters ---------- x : `torch.Tensor`, `shape=(batch_size, 1)` epsilon : `torch.Tensor`, `shape=(batch_size, len(order))` sigma : `torch.Tensor`, `shape=(batch_size, len(order))` order : `int` or `List` of `int` coefficients : torch.tensor or list switch : unitless switch width (distance) Returns ------- u : `torch.Tensor`, `shape=(batch_size, 1)` """ if isinstance(order, list): order = torch.tensor(order, device=x.device) if isinstance(coefficients, list): coefficients = torch.tensor(coefficients, device=x.device) assert order.shape[0] == 2 assert order.dim() == 1 # TODO: # for experiments only # erase later # compute sigma over x sigma_over_x = sigma / x # erase values under switch sigma_over_x = torch.where( torch.lt(x, switch), torch.zeros_like(sigma_over_x), sigma_over_x, ) return epsilon * ( coefficients[0] * sigma_over_x ** order[0] - coefficients[1] * sigma_over_x ** order[1] ) def gaussian(x, coefficients, phases=[idx * 0.001 for idx in range(200)]): r"""Gaussian basis function. Parameters ---------- x : torch.Tensor coefficients : list or torch.Tensor of length n_phases phases : list or torch.Tensor of length n_phases """ if isinstance(phases, list): # (number_of_phases, ) phases = torch.tensor(phases, device=x.device) # broadcasting # (number_of_hypernodes, number_of_snapshots, number_of_phases) phases = phases[None, None, :].repeat(x.shape[0], x.shape[1], 1) x = x[:, :, None].repeat(1, 1, phases.shape[-1]) coefficients = coefficients[:, None, :].repeat(1, x.shape[1], 1) return (coefficients * torch.exp(-0.5 * (x - phases) ** 2)).sum(-1) def linear_mixture(x, coefficients, phases=[0.0, 1.0]): r"""Linear mixture basis function. x : torch.Tensor coefficients : list or torch.Tensor of length 2 phases : list of length 2 """ assert len(phases) == 2, "Only two phases now." assert coefficients.shape[-1] == 2 # partition the dimensions # (, ) b1 = phases[0] b2 = phases[1] # (batch_size, 1) k1 = coefficients[:, 0][:, None] k2 = coefficients[:, 1][:, None] # get the original parameters # (batch_size, ) # k, b = linear_mixture_to_original(k1, k2, b1, b2) # (batch_size, 1) u1 = k1 * (x - b1) ** 2 u2 = k2 * (x - b2) ** 2 u = 0.5 * (u1 + u2) # - k1 * b1 ** 2 - k2 ** b2 ** 2 + b ** 2 return u def harmonic_periodic_coupled( x_harmonic, x_periodic, k, eq, periodicity=list(range(1, 3)), ): if isinstance(periodicity, list): periodicity = torch.tensor( periodicity, device=x_harmonic.device, dtype=torch.get_default_dtype(), ) n_theta = ( periodicity[None, None, :].repeat( x_periodic.shape[0], x_periodic.shape[1], 1 ) * x_periodic[:, :, None] ) cos_n_theta = n_theta.cos() k = k[:, None, :].repeat(1, x_periodic.shape[1], 1) sum_k_cos_n_theta = (k * cos_n_theta).sum(dim=-1) x_minus_eq = x_harmonic - eq energy = x_minus_eq * sum_k_cos_n_theta return energy def harmonic_harmonic_coupled( x0, x1, eq0, eq1, k, ): energy = k * (x0 - eq0) * (x1 - eq1) return energy def harmonic_harmonic_periodic_coupled( theta0, theta1, eq0, eq1, phi, k, ): energy = k * (theta0 - eq0) * (theta1 - eq1) * phi.cos() return energy ================================================ FILE: espaloma/mm/geometry.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= def reduce_stack(msg, out): """Copy massage and stack.""" def _reduce_stack(nodes, msg=msg, out=out): return {out: nodes.mailbox[msg]} return _reduce_stack def copy_src(src, out): """Copy source of an edge.""" def _copy_src(edges, src=src, out=out): return {out: edges.src[src].clone()} return _copy_src # ============================================================================= # SINGLE GEOMETRY ENTITY # ============================================================================= def distance(x0, x1): """Distance.""" return torch.norm(x0 - x1, p=2, dim=-1) def _angle(r0, r1): """Angle between vectors.""" angle = torch.atan2( torch.norm(torch.cross(r0, r1), p=2, dim=-1), torch.sum(torch.mul(r0, r1), dim=-1), ) return angle def angle(x0, x1, x2): """Angle between three points.""" left = x1 - x0 right = x1 - x2 return _angle(left, right) def _dihedral(r0, r1): """Dihedral between normal vectors.""" return _angle(r0, r1) def dihedral( x0: torch.Tensor, x1: torch.Tensor, x2: torch.Tensor, x3: torch.Tensor ) -> torch.Tensor: """Dihedral between four points. Reference --------- Closely follows implementation in Yutong Zhao's timemachine: https://github.com/proteneer/timemachine/blob/1a0ab45e605dc1e28c44ea90f38cb0dedce5c4db/timemachine/potentials/bonded.py#L152-L199 """ # check input shapes assert x0.shape == x1.shape == x2.shape == x3.shape # compute displacements 0->1, 2->1, 2->3 r01 = x1 - x0 + torch.randn_like(x0) * 1e-5 r21 = x1 - x2 + torch.randn_like(x0) * 1e-5 r23 = x3 - x2 + torch.randn_like(x0) * 1e-5 # compute normal planes n1 = torch.cross(r01, r21) n2 = torch.cross(r21, r23) rkj_normed = r21 / torch.norm(r21, dim=-1, keepdim=True) y = torch.sum(torch.mul(torch.cross(n1, n2), rkj_normed), dim=-1) x = torch.sum(torch.mul(n1, n2), dim=-1) # choose quadrant correctly theta = torch.atan2(y, x) return theta # ============================================================================= # GEOMETRY IN HYPERNODES # ============================================================================= def apply_bond(nodes): """Bond length in nodes.""" return {"x": distance(x0=nodes.data["xyz0"], x1=nodes.data["xyz1"])} def apply_angle(nodes): """Angle values in nodes.""" return { "x": angle( x0=nodes.data["xyz0"], x1=nodes.data["xyz1"], x2=nodes.data["xyz2"], ), "x_left": distance( x0=nodes.data["xyz1"], x1=nodes.data["xyz0"], ), "x_right": distance( x0=nodes.data["xyz1"], x1=nodes.data["xyz2"], ), "x_between": distance( x0=nodes.data["xyz0"], x1=nodes.data["xyz2"], ), } def apply_torsion(nodes): """Torsion dihedrals in nodes.""" return { "x": dihedral( x0=nodes.data["xyz0"], x1=nodes.data["xyz1"], x2=nodes.data["xyz2"], x3=nodes.data["xyz3"], ), "x_bond_left": distance( x0=nodes.data["xyz0"], x1=nodes.data["xyz1"], ), "x_bond_center": distance( x0=nodes.data["xyz1"], x1=nodes.data["xyz2"], ), "x_bond_right": distance( x0=nodes.data["xyz2"], x1=nodes.data["xyz3"], ), "x_angle_left": angle( x0=nodes.data["xyz0"], x1=nodes.data["xyz1"], x2=nodes.data["xyz2"], ), "x_angle_right": angle( x0=nodes.data["xyz1"], x1=nodes.data["xyz2"], x2=nodes.data["xyz3"], ), } # ============================================================================= # GEOMETRY IN GRAPH # ============================================================================= # NOTE: # The following functions modify graphs in-place. def geometry_in_graph(g): """Assign values to geometric entities in graphs. Parameters ---------- g : `dgl.DGLHeteroGraph` Input graph. Returns ------- g : `dgl.DGLHeteroGraph` Output graph. Notes ----- This function modifies graphs in-place. """ import dgl # Copy coordinates to higher-order nodes. g.multi_update_all( { **{ "n1_as_%s_in_n%s" % (pos_idx, big_idx): ( dgl.function.copy_u(u="xyz", out="m_xyz%s" % pos_idx), dgl.function.sum( msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx ), ) for big_idx in range(2, 5) for pos_idx in range(big_idx) }, **{ "n1_as_%s_in_%s" % (pos_idx, term): ( dgl.function.copy_u(u="xyz", out="m_xyz%s" % pos_idx), dgl.function.sum( msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx ), ) for term in ["nonbonded", "onefour"] for pos_idx in [0, 1] }, **{ "n1_as_%s_in_%s" % (pos_idx, term): ( dgl.function.copy_u(u="xyz", out="m_xyz%s" % pos_idx), dgl.function.sum( msg="m_xyz%s" % pos_idx, out="xyz%s" % pos_idx ), ) for term in ["n4_improper"] for pos_idx in [0, 1, 2, 3] }, }, cross_reducer="sum", ) # apply geometry functions g.apply_nodes(apply_bond, ntype="n2") g.apply_nodes(apply_angle, ntype="n3") if g.number_of_nodes("n4") > 0: g.apply_nodes(apply_torsion, ntype="n4") # copy coordinates to nonbonded if g.number_of_nodes("nonbonded") > 0: g.apply_nodes(apply_bond, ntype="nonbonded") if g.number_of_nodes("onefour") > 0: g.apply_nodes(apply_bond, ntype="onefour") if g.number_of_nodes("n4_improper") > 0: g.apply_nodes(apply_torsion, ntype="n4_improper") return g class GeometryInGraph(torch.nn.Module): def __init__(self, *args, **kwargs): super(GeometryInGraph, self).__init__() self.args = args self.kwargs = kwargs def forward(self, g): return geometry_in_graph(g, *self.args, **self.kwargs) ================================================ FILE: espaloma/mm/nonbonded.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch # ============================================================================= # CONSTANTS # ============================================================================= import espaloma as esp from openmm import unit # CODATA 2018 # ref https://en.wikipedia.org/wiki/Coulomb_constant # Coulomb constant K_E = ( 8.9875517923 * 1e9 * unit.newton * unit.meter**2 * unit.coulomb ** (-2) * esp.units.PARTICLE ** (-1) ).value_in_unit(esp.units.COULOMB_CONSTANT_UNIT) # ============================================================================= # UTILITY FUNCTIONS FOR COMBINATION RULES FOR NONBONDED # ============================================================================= def geometric_mean(msg="m", out="epsilon"): def _geometric_mean(nodes): return {out: torch.prod(nodes.mailbox[msg], dim=1).pow(0.5)} return _geometric_mean def arithmetic_mean(msg="m", out="sigma"): def _arithmetic_mean(nodes): return {out: torch.sum(nodes.mailbox[msg], dim=1).mul(0.5)} return _arithmetic_mean # ============================================================================= # COMBINATION RULES FOR NONBONDED # ============================================================================= def lorentz_berthelot(g, suffix=""): import dgl g.multi_update_all( { "n1_as_%s_in_%s" % (pos_idx, term): ( dgl.function.copy_u( u="epsilon%s" % suffix, out="m_epsilon" ), geometric_mean(msg="m_epsilon", out="epsilon%s" % suffix), ) for pos_idx in [0, 1] for term in ["nonbonded", "onefour"] }, cross_reducer="sum", ) g.multi_update_all( { "n1_as_%s_in_%s" % (pos_idx, term): ( dgl.function.copy_u(u="sigma%s" % suffix, out="m_sigma"), arithmetic_mean(msg="m_sigma", out="sigma%s" % suffix), ) for pos_idx in [0, 1] for term in ["nonbonded", "onefour"] }, cross_reducer="sum", ) return g def multiply_charges(g, suffix=""): """Multiply the charges of atoms into nonbonded and onefour terms. Parameters ---------- g : dgl.HeteroGraph Input graph. Returns ------- dgl.HeteroGraph : The modified graph with charges. """ import dgl g.multi_update_all( { "n1_as_%s_in_%s" % (pos_idx, term): ( dgl.function.copy_u(u="q%s" % suffix, out="m_q"), dgl.function.sum(msg="m_q", out="_q") # lambda node: {"q%s" % suffix: node.mailbox["m_q"].prod(dim=1)} ) for pos_idx in [0, 1] for term in ["nonbonded", "onefour"] }, cross_reducer="stack", apply_node_func=lambda node: {"q": node.data["_q"].prod(dim=1)}, ) return g # ============================================================================= # ENERGY FUNCTIONS # ============================================================================= def lj_12_6(x, sigma, epsilon): """Lennard-Jones 12-6. Parameters ---------- x : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` sigma : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` epsilon : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` Returns ------- u : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` """ return esp.mm.functional.lj(x=x, sigma=sigma, epsilon=epsilon) def lj_9_6(x, sigma, epsilon): """Lennard-Jones 9-6. Parameters ---------- x : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` sigma : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` epsilon : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` Returns ------- u : `torch.Tensor`, `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` """ return esp.mm.functional.lj( x=x, sigma=sigma, epsilon=epsilon, order=[9, 6], coefficients=[2, 3] ) def coulomb(x, q, k_e=K_E): """Columb interaction without cutoff. Parameters ---------- x : `torch.Tensor`, shape=`(batch_size, 1)` or `(batch_size, batch_size, 1)` Distance between atoms. q : `torch.Tensor`, `shape=(batch_size, 1) or `(batch_size, batch_size, 1)` Product of charge. Returns ------- torch.Tensor : `shape=(batch_size, 1)` or `(batch_size, batch_size, 1)` Coulomb energy. Notes ----- This computes half Coulomb energy to count for the duplication in onefour and nonbonded enumerations. """ return 0.5 * k_e * q / x ================================================ FILE: espaloma/mm/tests/system.xml ================================================ ================================================ FILE: espaloma/mm/tests/test_angle.py ================================================ import numpy as np import numpy.testing as npt import pytest import torch def test_angle_random_vectors(): import espaloma as esp distribution = torch.distributions.normal.Normal( loc=torch.zeros( 3, ), scale=torch.ones( 3, ), ) left = distribution.sample() right = distribution.sample() cos_ref = (left * right).sum(dim=-1) / ( torch.norm(left) * torch.norm(right) ) cos_hat = torch.cos(esp.mm.geometry._angle(left, right)) npt.assert_almost_equal(cos_ref.numpy(), cos_hat.numpy(), decimal=3) def test_angle_random_points(): import espaloma as esp distribution = torch.distributions.normal.Normal( loc=torch.zeros(5, 3), scale=torch.ones(5, 3) ) x0 = distribution.sample() x1 = distribution.sample() x2 = distribution.sample() left = x1 - x0 right = x1 - x2 cos_ref = (left * right).sum(dim=-1) / ( torch.norm(left, dim=-1) * torch.norm(right, dim=-1) ) cos_hat = torch.cos(esp.angle(x0, x1, x2)) npt.assert_almost_equal(cos_ref.numpy(), cos_hat.numpy(), decimal=3) def test_zero(): import espaloma as esp x0 = torch.zeros(5, 3) npt.assert_almost_equal(esp.angle(x0, x0, x0).numpy(), 0.0) ================================================ FILE: espaloma/mm/tests/test_angle_energy.py ================================================ import numpy as np import numpy.testing as npt import pytest import torch import openmm from openmm import unit from espaloma.utils.geometry import _sample_four_particle_torsion_scan omm_angle_unit = unit.radian omm_energy_unit = unit.kilojoule_per_mole from openmm import app import espaloma as esp def test_energy_angle_and_bond(): g = esp.Graph("C") # make simulation from espaloma.data.md import MoleculeVacuumSimulation # get simulation esp_simulation = MoleculeVacuumSimulation( n_samples=1, n_steps_per_sample=10, forcefield="gaff-1.81" ) simulation = esp_simulation.simulation_from_graph(g) system = simulation.system esp_simulation.run(g) forces = list(system.getForces()) energies = {} for idx, force in enumerate(forces): force.setForceGroup(idx) name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) # create new simulation _simulation = openmm.app.Simulation( simulation.topology, system, openmm.VerletIntegrator(0.0), ) _simulation.context.setPositions( g.nodes["n1"].data["xyz"][:, 0, :].detach().numpy() * unit.nanometer ) for idx, force in enumerate(forces): name = force.__class__.__name__ state = _simulation.context.getState( getEnergy=True, getParameters=True, groups=2**idx, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT ) energies[name] = energy for idx, force in enumerate(forces): name = force.__class__.__name__ if "HarmonicAngleForce" in name: print("openmm thinks there are %s angles" % force.getNumAngles()) for _idx in range(force.getNumAngles()): _, __, ___, eq, k = force.getAngleParameters(_idx) eq = eq.value_in_unit(esp.units.ANGLE_UNIT) k = k.value_in_unit(esp.units.ANGLE_FORCE_CONSTANT_UNIT) print(eq, k) # parametrize ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81") g = ff.parametrize(g) # n2 : bond, n3: angle, n1: nonbonded? # n1 : sigma (k), epsilon (eq), and charge (not included yet) for term in ["n2", "n3"]: g.nodes[term].data["k"] = g.nodes[term].data["k_ref"] g.nodes[term].data["eq"] = g.nodes[term].data["eq_ref"] print( "espaloma thinks there are %s angles" % g.heterograph.number_of_nodes("n3") ) print(g.nodes["n3"].data["k"]) print(g.nodes["n3"].data["eq"]) # for each atom, store n_snapshots x 3 # g.nodes["n1"].data["xyz"] = torch.tensor( # simulation.context.getState(getPositions=True) # .getPositions(asNumpy=True) # .value_in_unit(esp.units.DISTANCE_UNIT), # dtype=torch.float32, # )[None, :, :].permute(1, 0, 2) # print(g.nodes['n2'].data) esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3", "n4"]) n_decimals = 3 # test angles npt.assert_almost_equal( g.nodes["g"].data["u_n3"].detach().numpy(), energies["HarmonicAngleForce"], decimal=n_decimals, ) if __name__ == "__main__": test_energy_angle_and_bond() ================================================ FILE: espaloma/mm/tests/test_bond_energy.py ================================================ import pytest def test_multiple_conformation(): import espaloma as esp g = esp.Graph("c1ccccc1") # make simulation from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) g = simulation.run(g, in_place=True) param = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize g = param(g) esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph(g.heterograph, suffix="_ref") ================================================ FILE: espaloma/mm/tests/test_charge_energy_consistency.py ================================================ import pytest import espaloma as esp import numpy as np import numpy.testing as npt import pytest import torch @pytest.mark.parametrize( "g", esp.data.esol(first=10), # use a subset of ESOL dataset to test # [esp.Graph("c1ccccc1")], ) def test_coulomb_energy_consistency(g): """We use both `esp.mm` and OpenMM to compute the Coulomb energy of some molecules with generated geometries and see if the resulting Columb energy matches. """ from openff.units import unit as openff_unit from espaloma.data.md import MoleculeVacuumSimulation print(g.mol) # get simulation esp_simulation = MoleculeVacuumSimulation( n_samples=10, n_steps_per_sample=10, forcefield="gaff-1.81", charge_method="gasteiger", ) simulation = esp_simulation.simulation_from_graph(g) charges = g.mol.partial_charges.m_as(openff_unit.elementary_charge).flatten() system = simulation.system esp_simulation.run(g, in_place=True) # if MD blows up, forget about it if g.nodes["n1"].data["xyz"].abs().max() > 100: pytest.skip( "MD simulation blew up, skipping test. " ) g.nodes["n1"].data["q"] = torch.tensor(charges).unsqueeze(-1) esp.mm.nonbonded.multiply_charges(g.heterograph) esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph( g.heterograph, terms=["nonbonded", "onefour"] ) print(g.nodes["g"].data["u"].detach()) print(esp.data.md.get_coulomb_force(g)[0]) npt.assert_almost_equal( g.nodes["g"].data["u"].detach().numpy(), esp.data.md.get_coulomb_force(g)[0].numpy(), decimal=3, ) ================================================ FILE: espaloma/mm/tests/test_charge_energy_consistency_hardcode.py ================================================ import pytest import espaloma as esp import numpy as np import numpy.testing as npt import pytest import torch import openmm from openmm import unit @pytest.mark.parametrize( "g", esp.data.esol(first=1), # use a subset of ESOL dataset to test ) def test_coulomb_energy_consistency(g): """We use both `esp.mm` and OpenMM to compute the Coulomb energy of some molecules with generated geometries and see if the resulting Columb energy matches. """ # make simulation from espaloma.data.md import MoleculeVacuumSimulation # get simulation esp_simulation = MoleculeVacuumSimulation( n_samples=1, n_steps_per_sample=10, forcefield="gaff-1.81", charge_method="gasteiger", ) simulation = esp_simulation.simulation_from_graph(g) charges = g.mol.partial_charges.flatten() system = simulation.system esp_simulation.run(g, in_place=True) # if MD blows up, forget about it if g.nodes["n1"].data["xyz"].abs().max() > 100: pytest.skip( "MD simulation blew up, skipping test. " ) _simulation = openmm.app.Simulation( simulation.topology, system, openmm.VerletIntegrator(0.0), ) forces = list(system.getForces()) for force in forces: name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) force.updateParametersInContext(_simulation.context) _simulation.context.setPositions( g.nodes["n1"].data["xyz"][:, 0, :].detach().numpy() * unit.bohr ) state = _simulation.context.getState( getEnergy=True, getParameters=True, ) energy_old = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT ) forces = list(system.getForces()) print(forces) for force in forces: name = force.__class__.__name__ print(name) if name == "NonbondedForce": force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) print(force.getNumExceptions()) for idx in range(force.getNumParticles()): q, sigma, epsilon = force.getParticleParameters(idx) force.setParticleParameters(idx, 0.0, sigma, epsilon) for idx in range(force.getNumExceptions()): idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( idx ) force.setExceptionParameters( idx, idx0, idx1, 0.0, sigma, epsilon ) force.updateParametersInContext(_simulation.context) state = _simulation.context.getState( getEnergy=True, getParameters=True, ) energy_new = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT ) g.nodes["n1"].data["q"] = torch.tensor(charges).unsqueeze(-1) esp.mm.nonbonded.multiply_charges(g.heterograph) esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph( g.heterograph, terms=["nonbonded", "onefour"] ) npt.assert_almost_equal( g.nodes["g"].data["u"].item(), energy_old - energy_new, decimal=3, ) ================================================ FILE: espaloma/mm/tests/test_dihedral.py ================================================ import numpy.testing as npt import torch import espaloma as esp from espaloma.utils.geometry import ( _sample_four_particle_torsion_scan, _timemachine_signed_torsion_angle, ) def test_dihedral_vectors(): import espaloma as esp distribution = torch.distributions.normal.Normal( loc=torch.zeros(5, 3), scale=torch.ones(5, 3) ) left = distribution.sample() right = distribution.sample() npt.assert_almost_equal( esp.mm.geometry._angle(left, right).numpy(), esp.mm.geometry._dihedral(left, right).numpy(), decimal=3, ) def test_dihedral_points(): n_samples = 1000 # get geometries xyz_np = _sample_four_particle_torsion_scan(n_samples) # compute dihedrals using timemachine (numpy / JAX) ci, cj, ck, cl = ( xyz_np[:, 0, :], xyz_np[:, 1, :], xyz_np[:, 2, :], xyz_np[:, 3, :], ) theta_timemachine = _timemachine_signed_torsion_angle(ci, cj, ck, cl) # compute dihedrals using espaloma (PyTorch) xyz = torch.tensor(xyz_np) x0, x1, x2, x3 = xyz[:, 0, :], xyz[:, 1, :], xyz[:, 2, :], xyz[:, 3, :] theta_espaloma = esp.dihedral(x0, x1, x2, x3).numpy() npt.assert_almost_equal( theta_timemachine, theta_espaloma, decimal=3, ) ================================================ FILE: espaloma/mm/tests/test_distance.py ================================================ import numpy as np import numpy.testing as npt import pytest import torch def test_distance(): import espaloma as esp distribution = torch.distributions.normal.Normal( loc=torch.zeros(5, 3), scale=torch.ones(5, 3) ) x0 = distribution.sample() x1 = distribution.sample() npt.assert_almost_equal( esp.distance(x0, x1).numpy(), torch.sqrt((x0 - x1).pow(2).sum(dim=-1)).numpy(), decimal=3, ) npt.assert_almost_equal(esp.distance(x0, x0).numpy(), 0.0) ================================================ FILE: espaloma/mm/tests/test_energy.py ================================================ import pytest import torch import espaloma as esp def test_import(): esp.mm.energy def test_energy(): g = esp.Graph("c1ccccc1") # make simulation from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) g = simulation.run(g, in_place=True) param = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize g = param(g) # parametrize layer = esp.nn.dgl_legacy.gn() net = torch.nn.Sequential( esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]), esp.nn.readout.janossy.JanossyPooling( in_features=32, config=[32, "tanh"], out_features={ 1: ["epsilon", "sigma"], 2: ["k", "eq"], 3: ["k", "eq"], 4: ["k"], }, ), esp.nn.readout.janossy.JanossyPoolingImproper( in_features=32, config=[32, "tanh"], out_features={ "k": 6, }, ), ) g = net(g.heterograph) # print(g.nodes['n2'].data) esp.mm.geometry.geometry_in_graph(g) # esp.mm.energy.energy_in_graph(g) esp.mm.energy.energy_in_graph(g, terms=["n2", "n3", "n4", "n4_improper"]) # def test_energy_consistent(): # g = esp.Graph("c1ccccc1") # # # make simulation # from espaloma.data.md import MoleculeVacuumSimulation # # simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) # g = simulation.run(g, in_place=True) # # param = esp.graphs.legacy_force_field.LegacyForceField( # "smirnoff99Frosst-1.1.0" # ).parametrize # # g = param(g) # # for node in ["n1", "n2", "n3"]: # _dict = {} # for data in g.nodes[node].data.keys(): # if data.endswith("_ref"): # _dict[data.replace("_ref", "")] = g.nodes[node].data[data] # for key, value in _dict.items(): # g.nodes[node].data[key] = value # # # print(g.nodes['n2'].data) # esp.mm.geometry.geometry_in_graph(g.heterograph) # esp.mm.energy.energy_in_graph(g.heterograph) # # esp.mm.energy.energy_in_graph(g.heterograph, suffix="_ref") ================================================ FILE: espaloma/mm/tests/test_energy_gaussian.py ================================================ import pytest """ def test_energy(): import espaloma as esp import torch g = esp.Graph("c1ccccc1") # make simulation from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) g = simulation.run(g, in_place=True) param = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ).parametrize g = param(g) # parametrize layer = esp.nn.dgl_legacy.gn() net = torch.nn.Sequential( esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]), esp.nn.readout.janossy.JanossyPooling( in_features=32, config=[32, "tanh"], out_features={ 1: {'sigma': 1, 'epsilon': 1}, 2: {'coefficients': 200}, 3: {'k':1, 'eq': 1}, }, ), ) g = net(g.heterograph) # print(g.nodes['n2'].data) esp.mm.geometry.geometry_in_graph(g) esp.mm.energy.energy_in_graph(g) esp.mm.energy.energy_in_graph(g, suffix="_ref") """ ================================================ FILE: espaloma/mm/tests/test_energy_ii.py ================================================ import pytest import espaloma as esp import torch import dgl def test_energy(): g = esp.Graph("c1ccccc1") # make simulation from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) g = simulation.run(g, in_place=True) param = esp.graphs.legacy_force_field.LegacyForceField( "gaff-1.81" ).parametrize g = param(g) # parametrize # layer layer = esp.nn.layers.dgl_legacy.gn() # representation representation = esp.nn.Sequential( layer, config=[32, "relu", 32, "relu", 32, "relu"] ) # get the last bit of units units = 32 janossy_config = [32, "relu"] readout = esp.nn.readout.janossy.JanossyPooling( in_features=units, config=janossy_config, out_features={ 2: {"log_coefficients": 2}, 3: { "log_coefficients": 2, "coefficients_urey_bradley": 2, "k_bond_bond": 1, "k_bond_angle": 1, "k_bond_angle": 1, }, 4: { "k": 6, "k_angle_angle": 1, "k_angle_angle_torsion": 1, "k_angle_torsion": 1, "k_side_torsion": 1, "k_center_torsion": 1, }, }, ) readout_improper = esp.nn.readout.janossy.JanossyPoolingImproper( in_features=units, config=janossy_config ) class ExpCoeff(torch.nn.Module): def forward(self, g): g.nodes["n2"].data["coefficients"] = ( g.nodes["n2"].data["log_coefficients"].exp() ) g.nodes["n3"].data["coefficients"] = ( g.nodes["n3"].data["log_coefficients"].exp() ) return g class CarryII(torch.nn.Module): def forward(self, g): import math g.multi_update_all( { "n2_as_0_in_n3": ( dgl.function.copy_u("u", "m_u_0"), dgl.function.sum("m_u_0", "u_left"), ), "n2_as_1_in_n3": ( dgl.function.copy_u("u", "m_u_1"), dgl.function.sum("m_u_1", "u_right"), ), "n2_as_0_in_n4": ( dgl.function.copy_u("u", "m_u_0"), dgl.function.sum("m_u_0", "u_bond_left"), ), "n2_as_1_in_n4": ( dgl.function.copy_u("u", "m_u_1"), dgl.function.sum("m_u_1", "u_bond_center"), ), "n2_as_2_in_n4": ( dgl.function.copy_u("u", "m_u_2"), dgl.function.sum("m_u_2", "u_bond_right"), ), "n3_as_0_in_n4": ( dgl.function.copy_u("u", "m3_u_0"), dgl.function.sum("m3_u_0", "u_angle_left"), ), "n3_as_1_in_n4": ( dgl.function.copy_u("u", "m3_u_1"), dgl.function.sum("m3_u_1", "u_angle_right"), ), }, cross_reducer="sum", ) return g net = torch.nn.Sequential( representation, readout, readout_improper, ExpCoeff(), esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(terms=["n2", "n3", "n4", "n4_improper"]), CarryII(), esp.mm.energy.EnergyInGraphII(), ) torch.nn.init.normal_( net[1].f_out_2_to_log_coefficients.bias, mean=-5, ) torch.nn.init.normal_( net[1].f_out_3_to_log_coefficients.bias, mean=-5, ) for name, module in net[1].named_modules(): if "k" in name: torch.nn.init.normal(module.bias, mean=0.0, std=1e-4) torch.nn.init.normal(module.weight, mean=0.0, std=1e-4) g = net(g.heterograph) print(g.nodes["n3"].data) print(g.nodes["n4"].data) # print(g.nodes['n2'].data) esp.mm.geometry.geometry_in_graph(g) esp.mm.energy.energy_in_graph(g) ================================================ FILE: espaloma/mm/tests/test_geometry.py ================================================ import pytest import torch import espaloma as esp from espaloma.graphs.utils.regenerate_impropers import regenerate_impropers def test_import(): esp.mm.geometry # later, if we want to do multiple molecules, group these into a struct smiles = "c1ccccc1" n_samples = 2 ## Different number of expected terms for different improper permutations expected_n_terms = { "none": dict(n2=24, n3=36, n4=48, n4_improper=36), "espaloma": dict(n2=24, n3=36, n4=48, n4_improper=36), "smirnoff": dict(n2=24, n3=36, n4=48, n4_improper=18), } @pytest.fixture def all_g(): from espaloma.data.md import MoleculeVacuumSimulation all_g = {} for improper_def in expected_n_terms.keys(): g = esp.Graph(smiles) if improper_def != "none": regenerate_impropers(g, improper_def) simulation = MoleculeVacuumSimulation( n_samples=n_samples, n_steps_per_sample=1 ) g = simulation.run(g, in_place=True) all_g[improper_def] = g return all_g def test_geometry_can_be_computed_without_exceptions(all_g): for g in all_g.values(): g = esp.mm.geometry.geometry_in_graph(g.heterograph) def test_geometry_n_terms(all_g): for improper_def, g in all_g.items(): g = esp.mm.geometry.geometry_in_graph(g.heterograph) for term, n_terms in expected_n_terms[improper_def].items(): assert g.nodes[term].data["x"].shape == torch.Size( [n_terms, n_samples] ) ================================================ FILE: espaloma/mm/tests/test_linear_combination.py ================================================ import pytest def test_linear_combination(): import torch import espaloma as esp assert ( esp.mm.functional.linear_mixture( 0.0, torch.tensor([[0.0, 0.0]]), ) == 0.0 ) assert ( esp.mm.functional.linear_mixture( 1.0, torch.tensor([[1.0, 1.0]]), [0.0, 2.0], ) == 1.0 ) def test_consistency(): import torch import espaloma as esp g = esp.Graph("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") from espaloma.data.md import MoleculeVacuumSimulation simulation = MoleculeVacuumSimulation(n_samples=10, n_steps_per_sample=10) g = simulation.run(g, in_place=True) g.nodes["n2"].data["coefficients"] = torch.randn( g.heterograph.number_of_nodes("n2"), 2 ).exp() g.nodes["n3"].data["coefficients"] = torch.randn( g.heterograph.number_of_nodes("n3"), 2 ).exp() esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3"]) u0_2 = g.nodes["n2"].data["u"] - g.nodes["n2"].data["u"].mean( dim=1, keepdims=True ) u0_3 = g.nodes["n3"].data["u"] - g.nodes["n3"].data["u"].mean( dim=1, keepdims=True ) u0 = g.nodes["g"].data["u"] - g.nodes["g"].data["u"].mean( dim=1, keepdims=True ) ( g.nodes["n2"].data["k"], g.nodes["n2"].data["eq"], ) = esp.mm.functional.linear_mixture_to_original( g.nodes["n2"].data["coefficients"][:, 0][:, None], g.nodes["n2"].data["coefficients"][:, 1][:, None], 1.5, 6.0, ) import math ( g.nodes["n3"].data["k"], g.nodes["n3"].data["eq"], ) = esp.mm.functional.linear_mixture_to_original( g.nodes["n3"].data["coefficients"][:, 0][:, None], g.nodes["n3"].data["coefficients"][:, 1][:, None], 0.0, math.pi, ) g.nodes["n2"].data.pop("coefficients") g.nodes["n3"].data.pop("coefficients") esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3"]) u1_2 = g.nodes["n2"].data["u"] - g.nodes["n2"].data["u"].mean( dim=1, keepdims=True ) u1_3 = g.nodes["n3"].data["u"] - g.nodes["n3"].data["u"].mean( dim=1, keepdims=True ) u1 = g.nodes["g"].data["u"] - g.nodes["g"].data["u"].mean( dim=1, keepdims=True ) import numpy.testing as npt npt.assert_almost_equal( u0_2.detach().numpy(), u1_2.detach().numpy(), decimal=3, ) npt.assert_almost_equal( u0_3.detach().numpy(), u1_3.detach().numpy(), decimal=3, ) npt.assert_almost_equal( u0.detach().numpy(), u1.detach().numpy(), decimal=3, ) ================================================ FILE: espaloma/mm/tests/test_openmm_consistency.py ================================================ import numpy as np import numpy.testing as npt import pytest import torch import openmm from openmm import unit from espaloma.utils.geometry import _sample_four_particle_torsion_scan omm_angle_unit = unit.radian omm_energy_unit = unit.kilojoule_per_mole from openmm import app import espaloma as esp decimal_threshold = 2 def _create_torsion_sim( periodicity: int = 2, phase=0 * omm_angle_unit, k=10.0 * omm_energy_unit ) -> app.Simulation: """Create a 4-particle OpenMM Simulation containing only a PeriodicTorsionForce""" system = openmm.System() # add 4 particles of unit mass for _ in range(4): system.addParticle(1) # add torsion force to system force = openmm.PeriodicTorsionForce() force.addTorsion(0, 1, 2, 3, periodicity, phase, k) system.addForce(force) # create openmm Simulation, which requires a Topology and Integrator topology = app.Topology() chain = topology.addChain() residue = topology.addResidue("torsion", chain) for name in ["a", "b", "c", "d"]: topology.addAtom(name, "C", residue) integrator = openmm.VerletIntegrator(1.0) sim = app.Simulation(topology, system, integrator) return sim # TODO: mark this properly: want to test periodicities 1..6, +ve, -ve k # @pytest.mark.parametrize(periodicity=[1,2,3,4,5,6], k=[-10 * omm_energy_unit, +10 * omm_energy_unit]) def test_periodic_torsion( periodicity=4, k=10 * omm_energy_unit, n_samples=100 ): """Using simulated torsion scan, test if espaloma torsion energies and OpenMM torsion energies agree. """ phase = 0 * omm_angle_unit # all zero phases # create torsion simulation sim = _create_torsion_sim(periodicity=periodicity, phase=phase, k=k) # grab snapshots from torsion scan xyz_np = _sample_four_particle_torsion_scan(n_samples) # compute energies using OpenMM openmm_energies = np.zeros(n_samples) for i, pos in enumerate(xyz_np): sim.context.setPositions(pos) openmm_energies[i] = ( sim.context.getState(getEnergy=True).getPotentialEnergy() / omm_energy_unit ) # compute energies using espaloma xyz = torch.tensor(xyz_np) x0, x1, x2, x3 = xyz[:, 0, :], xyz[:, 1, :], xyz[:, 2, :], xyz[:, 3, :] theta = esp.mm.geometry.dihedral(x0, x1, x2, x3).reshape((n_samples, 1)) ks = torch.zeros(n_samples, 6) ks[:, periodicity - 1] = k.value_in_unit(esp.units.ENERGY_UNIT) espaloma_energies = ( esp.mm.functional.periodic(theta, ks).numpy().flatten() * esp.units.ENERGY_UNIT ) espaloma_energies_in_omm_units = espaloma_energies.value_in_unit( omm_energy_unit ) np.testing.assert_almost_equal( actual=espaloma_energies_in_omm_units, desired=openmm_energies, decimal=decimal_threshold, ) # TODO: parameterize on the individual energy terms also @pytest.mark.parametrize( "g", esp.data.esol(first=10), ) def test_energy_angle_and_bond(g): # make simulation from espaloma.data.md import MoleculeVacuumSimulation # get simulation esp_simulation = MoleculeVacuumSimulation( n_samples=1, n_steps_per_sample=1000, forcefield="gaff-1.81", charge_method="gasteiger", ) simulation = esp_simulation.simulation_from_graph(g) system = simulation.system esp_simulation.run(g, in_place=True) # if MD blows up, forget about it if g.nodes["n1"].data["xyz"].abs().max() > 100: pytest.skip("MD simulation blew up, skipping test.") forces = list(system.getForces()) energies = {} for idx, force in enumerate(forces): force.setForceGroup(idx) name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) # epsilons = {} # sigmas = {} # for _idx in range(force.getNumParticles()): # q, sigma, epsilon = force.getParticleParameters(_idx) # # record parameters # epsilons[_idx] = epsilon # sigmas[_idx] = sigma # force.setParticleParameters(_idx, 0., sigma, epsilon) # def sigma_combining_rule(sig1, sig2): # return (sig1 + sig2) / 2 # def eps_combining_rule(eps1, eps2): # return np.sqrt(np.abs(eps1 * eps2)) # for _idx in range(force.getNumExceptions()): # idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( # _idx) # force.setExceptionParameters( # _idx, # idx0, # idx1, # 0.0, # sigma_combining_rule(sigmas[idx0], sigmas[idx1]), # eps_combining_rule(epsilons[idx0], epsilons[idx1]) # ) # force.updateParametersInContext(_simulation.context) # create new simulation _simulation = openmm.app.Simulation( simulation.topology, system, openmm.VerletIntegrator(0.0), ) _simulation.context.setPositions( g.nodes["n1"].data["xyz"][:, 0, :].detach().numpy() * unit.bohr ) for idx, force in enumerate(forces): name = force.__class__.__name__ state = _simulation.context.getState( getEnergy=True, getParameters=True, groups=2**idx, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT ) energies[name] = energy # parametrize ff = esp.graphs.legacy_force_field.LegacyForceField("gaff-1.81") g = ff.parametrize(g) # n2 : bond, n3: angle, n1: nonbonded? # n1 : sigma (k), epsilon (eq), and charge (not included yet) for term in ["n2", "n3"]: g.nodes[term].data["k"] = g.nodes[term].data["k_ref"] g.nodes[term].data["eq"] = g.nodes[term].data["eq_ref"] """ for term in ["n1"]: g.nodes[term].data["sigma"] = g.nodes[term].data["sigma_ref"] g.nodes[term].data["epsilon"] = g.nodes[term].data["epsilon_ref"] # g.nodes[term].data['q'] = g.nodes[term].data['q_ref'] """ for term in ["n4"]: g.nodes[term].data["phases"] = g.nodes[term].data["phases_ref"] g.nodes[term].data["periodicity"] = g.nodes[term].data[ "periodicity_ref" ] g.nodes[term].data["k"] = g.nodes[term].data["k_ref"] # for each atom, store n_snapshots x 3 # g.nodes["n1"].data["xyz"] = torch.tensor( # simulation.context.getState(getPositions=True) # .getPositions(asNumpy=True) # .value_in_unit(esp.units.DISTANCE_UNIT), # dtype=torch.float32, # )[None, :, :].permute(1, 0, 2) # print(g.nodes['n2'].data) esp.mm.geometry.geometry_in_graph(g.heterograph) esp.mm.energy.energy_in_graph(g.heterograph, terms=["n2", "n3", "n4"]) # writes into nodes # .data['u_nonbonded'], .data['u_onefour'], .data['u2'], .data['u3'], # TODO: consider more carefully how many decimals of precision are needed n_decimals = 3 # test bonds npt.assert_almost_equal( g.nodes["g"].data["u_n2"].detach().numpy(), energies["HarmonicBondForce"], decimal=n_decimals, ) # test angles npt.assert_almost_equal( g.nodes["g"].data["u_n3"].detach().numpy(), energies["HarmonicAngleForce"], decimal=n_decimals, ) # propers = g.nodes["g"].data["u_n4"].detach().numpy() # impropers = g.nodes["g"].data["u_n4_improper"].detach().numpy() # all_torsions = propers + impropers # npt.assert_almost_equal( # all_torsions, # energies["PeriodicTorsionForce"], # decimal=n_decimals, # ) # print(all_torsions) # print(energies["PeriodicTorsionForce"]) # TODO: # This is not working now, matching OpenMM nonbonded. # test nonbonded # TODO: must set all charges to zero in _simulation for this to pass currently, since g doesn't have any charges # npt.assert_almost_equal( # g.nodes['g'].data['u_nonbonded'].numpy()\ # + g.nodes['g'].data['u_onefour'].numpy(), # energies['NonbondedForce'], # decimal=3, # ) ================================================ FILE: espaloma/mm/tests/test_recoverability.py ================================================ # Check whether we can recover a molecular mechanics model containing just one kind of term # Initially, interested in recovering a molecular mechanics model containing only improper torsion terms import numpy as np from openff.toolkit.topology import Molecule, Topology from openff.toolkit.typing.engines.smirnoff import ForceField import openmm as mm import pytest import espaloma as esp import torch def _create_impropers_only_system( smiles: str = "CC1=C(C(=O)C2=C(C1=O)N3CC4C(C3(C2COC(=O)N)OC)N4)N", ) -> mm.System: """Create a simulation that contains only improper torsion terms, by parameterizing with openff-1.2.0 and deleting all terms but impropers """ molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) g = esp.Graph(molecule) topology = Topology.from_molecules(molecule) forcefield = ForceField("openff-1.2.0.offxml") openmm_system = forcefield.create_openmm_system(topology) # delete all forces except PeriodicTorsionForce is_torsion = ( lambda force: "PeriodicTorsionForce" in force.__class__.__name__ ) for i in range(openmm_system.getNumForces())[::-1]: if not is_torsion(openmm_system.getForce(i)): openmm_system.removeForce(i) assert openmm_system.getNumForces() == 1 torsion_force = openmm_system.getForce(0) assert is_torsion(torsion_force) # set k = 0 for any torsion that's not an improper indices = set( map( tuple, esp.graphs.utils.offmol_indices.improper_torsion_indices( molecule ), ) ) num_impropers_retained = 0 for i in range(torsion_force.getNumTorsions()): ( p1, p2, p3, p4, periodicity, phase, k, ) = torsion_force.getTorsionParameters(i) if (p1, p2, p3, p4) in indices: num_impropers_retained += 1 else: torsion_force.setTorsionParameters( i, p1, p2, p3, p4, periodicity, phase, 0.0 ) assert ( num_impropers_retained > 0 ) # otherwise this molecule is not a useful test case! return openmm_system, topology, g @pytest.mark.skip(reason="too slow") def test_improper_recover(): import openmm from openmm import unit from openmm.app import Simulation from openmm.unit import Quantity TEMPERATURE = 500 * unit.kelvin STEP_SIZE = 1 * unit.femtosecond COLLISION_RATE = 1 / unit.picosecond system, topology, g = _create_impropers_only_system() # use langevin integrator, although it's not super useful here integrator = openmm.LangevinIntegrator( TEMPERATURE, COLLISION_RATE, STEP_SIZE ) # initialize simulation simulation = Simulation( topology=topology, system=system, integrator=integrator ) import openff.toolkit # get conformer g.mol.generate_conformers( toolkit_registry=openff.toolkit.utils.RDKitToolkitWrapper(), ) # put conformer in simulation simulation.context.setPositions(g.mol.conformers[0]) # minimize energy simulation.minimizeEnergy() # set velocities simulation.context.setVelocitiesToTemperature(TEMPERATURE) samples = [] us = [] # loop through number of samples for _ in range(10): # run MD for `self.n_steps_per_sample` steps simulation.step(10) # append samples to `samples` samples.append( simulation.context.getState(getPositions=True) .getPositions(asNumpy=True) .value_in_unit(esp.units.DISTANCE_UNIT) ) us.append( simulation.context.getState(getEnergy=True) .getPotentialEnergy() .value_in_unit(esp.units.ENERGY_UNIT) ) # put samples into an array samples = np.array(samples) us = np.array(us) # put samples into tensor samples = torch.tensor(samples, dtype=torch.float32) us = torch.tensor(us, dtype=torch.float32)[None, :, None] g.heterograph.nodes["n1"].data["xyz"] = samples.permute(1, 0, 2) # require gradient for force matching g.heterograph.nodes["n1"].data["xyz"].requires_grad = True g.heterograph.nodes["g"].data["u_ref"] = us # parametrize layer = esp.nn.dgl_legacy.gn() net = torch.nn.Sequential( esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]), esp.nn.readout.janossy.JanossyPoolingImproper( in_features=32, config=[32, "tanh"], out_features={ "k": 6, }, ), esp.mm.geometry.GeometryInGraph(), esp.mm.energy.EnergyInGraph(terms=["n4_improper"]), ) optimizer = torch.optim.Adam(net.parameters(), 1e-3) for _ in range(1500): optimizer.zero_grad() net(g.heterograph) u_ref = g.nodes["g"].data["u"] u = g.nodes["g"].data["u_ref"] loss = torch.nn.MSELoss()(u_ref, u) loss.backward() print(loss) optimizer.step() assert loss.detach().numpy().item() < 0.1 # caffeine_smiles = 'CN1C=NC2=C1C(=O)N(C(=O)N2C)C' # # # def _create_random_impropers_only_system(smiles: str = caffeine_smiles, k_stddev: float = 10.0) -> mm.System: # """Create an OpenMM system that contains only a large number of improper torsion terms, # assigning random coefficients ~ N(0, k_stddev) kJ/mol""" # # molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) # # topology = Topology.from_molecules(molecule) # forcefield = ForceField('openff-1.2.0.offxml') # openmm_system = forcefield.create_openmm_system(topology) # # # delete all forces # while openmm_system.getNumForces() > 0: # openmm_system.removeForce(0) # # # add a torsion force # torsion_force = mm.PeriodicTorsionForce() # # # for each improper torsion abcd, sample a periodicity, phase, and k, then add 3 terms to torsion_force # # with different indices abcd, acdb, adbc but identical periodicity, phase, and k # indices = esp.graphs.utils.offmol_indices.improper_torsion_indices(molecule) # improper_perms = [(0, 1, 2, 3), (0, 2, 3, 1), (0, 3, 1, 2)] # # for inds in indices: # periodicity = np.random.randint(1, 7) # phase = 0 # k = np.random.randn() * k_stddev # for perm in improper_perms: # p1, p2, p3, p4 = [int(inds[p]) for p in perm] # careful to pass python ints rather than np ints to openmm # torsion_force.addTorsion(p1, p2, p3, p4, periodicity, phase, k) # # openmm_system.addForce(torsion_force) # # return openmm_system # TODO: integration test where we recover this molecular mechanics system from energies/forces ================================================ FILE: espaloma/mm/torsion.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import espaloma as esp # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def periodic_torsion( x, k, periodicity=list(range(1, 7)), phases=[0.0 for _ in range(6)] ): """Periodic torsion potential Parameters ---------- x : `torch.Tensor`, `shape = (batch_size, 1)` Dihedral value. k : `torch.Tensor`, `shape = (batch_size, n_phases)` Force constants. periodicity : `torch.Tensor`, `shape = (batch_size, n_phases)` Periodicities phases : `torch.Tensor`, `shape = (batch_size, n_phases)` Phase offsets Returns ------- u : `torch.Tensor`, `shape = (batch_size, 1)` Energy. """ # NOTE: # 0.5 because all torsions are calculated twice out = 0.5 * esp.mm.functional.periodic( x=x, k=k, periodicity=periodicity, phases=phases, ) # assert(out.shape == (len(x), 1)) return out def angle_angle( u_angle_left, u_angle_right, k_angle_angle, ): u_angle_left = u_angle_left - u_angle_left.min(dim=-1, keepdims=True)[0] u_angle_right = ( u_angle_right - u_angle_right.min(dim=-1, keepdims=True)[0] ) return k_angle_angle * (u_angle_left**0.5) * (u_angle_right**0.5) def angle_torsion( u_angle_left, u_angle_right, u_torsion, k_angle_torsion, ): u_angle_left = u_angle_left - u_angle_left.min(dim=-1, keepdims=True)[0] u_angle_right = ( u_angle_right - u_angle_right.min(dim=-1, keepdims=True)[0] ) return ( k_angle_torsion * (u_angle_left**0.5) * u_torsion + k_angle_torsion * (u_angle_right**0.5) * u_torsion ) def angle_angle_torsion( u_angle_left, u_angle_right, u_torsion, k_angle_angle_torsion, ): u_angle_left = u_angle_left - u_angle_left.min(dim=-1, keepdims=True)[0] u_angle_right = ( u_angle_right - u_angle_right.min(dim=-1, keepdims=True)[0] ) return ( k_angle_angle_torsion * (u_angle_left**0.5) * (u_angle_right**0.5) * u_torsion ) def bond_torsion( u_bond_left, u_bond_right, u_bond_center, u_torsion, k_side_torsion, k_center_torsion, ): u_bond_left = u_bond_left - u_bond_left.min(dim=-1, keepdims=True)[0] u_bond_right = u_bond_right - u_bond_right.min(dim=-1, keepdims=True)[0] u_bond_center = ( u_bond_center - u_bond_center.min(dim=-1, keepdims=True)[0] ) return ( k_side_torsion * u_torsion * (u_bond_left**0.5) + k_side_torsion * u_torsion * (u_bond_right**0.5) + k_center_torsion * u_torsion * (u_bond_center**0.5) ) ================================================ FILE: espaloma/nn/__init__.py ================================================ from . import baselines, layers, readout, sequential from .layers import dgl_legacy from .sequential import Sequential ================================================ FILE: espaloma/nn/baselines.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch # ============================================================================= # MODULE CLASSES # ============================================================================= class FreeParameterBaseline(torch.nn.Module): """Parametrize a graph by populating the parameters with free `torch.nn.Parameter`. """ def __init__(self, g_ref): super(FreeParameterBaseline, self).__init__() self.g_ref = g_ref # whenever there is a reference parameter, # assign a `torch.nn.Parameter` for term in self.g_ref.ntypes: for param, param_value in self.g_ref.nodes[term].data.items(): if param.endswith("_ref") and "u" not in param: setattr( self, "%s_%s" % (term, param.replace("_ref", "")), torch.nn.Parameter( torch.zeros_like( param_value.clone().detach(), ) ), ) def forward(self, g): update_dicts = {node: {} for node in self.g_ref.ntypes} for term in self.g_ref.ntypes: for param, param_value in self.g_ref.nodes[term].data.items(): if param.endswith("_ref"): if hasattr( self, "%s_%s" % (term, param.replace("_ref", "")) ): update_dicts[term][ param.replace("_ref", "") ] = getattr( self, "%s_%s" % (term, param.replace("_ref", "")), ) for node, update_dict in update_dicts.items(): for param, param_value in update_dict.items(): g.nodes[node].data[param] = param_value return g class FreeParameterBaselineInitMean(torch.nn.Module): """Parametrize a graph by populating the parameters with free `torch.nn.Parameter`. """ def __init__(self, g_ref): super(FreeParameterBaselineInitMean, self).__init__() self.g_ref = g_ref # whenever there is a reference parameter, # assign a `torch.nn.Parameter` for term in self.g_ref.ntypes: for param, param_value in self.g_ref.nodes[term].data.items(): if param.endswith("_ref") and "u" not in param: setattr( self, "%s_%s" % (term, param.replace("_ref", "")), torch.nn.Parameter( torch.ones_like( param_value.clone().detach(), ) * param_value.clone().detach().mean() ), ) def forward(self, g): update_dicts = {node: {} for node in self.g_ref.ntypes} for term in self.g_ref.ntypes: for param, param_value in self.g_ref.nodes[term].data.items(): if param.endswith("_ref"): if hasattr( self, "%s_%s" % (term, param.replace("_ref", "")) ): update_dicts[term][ param.replace("_ref", "") ] = getattr( self, "%s_%s" % (term, param.replace("_ref", "")), ) for node, update_dict in update_dicts.items(): for param, param_value in update_dict.items(): g.nodes[node].data[param] = param_value return g ================================================ FILE: espaloma/nn/layers/__init__.py ================================================ import espaloma.nn.layers.dgl_legacy ================================================ FILE: espaloma/nn/layers/dgl_legacy.py ================================================ """ Legacy models from DGL. """ # ============================================================================= # IMPORTS # ============================================================================= import torch # ============================================================================= # CONSTANT # ============================================================================= DEFAULT_MODEL_KWARGS = { "SAGEConv": {"aggregator_type": "mean"}, "GATConv": {"num_heads": 4}, "TAGConv": {"k": 2}, } # ============================================================================= # MODULE CLASSES # ============================================================================= class GN(torch.nn.Module): def __init__( self, in_features, out_features, model_name="GraphConv", kwargs={}, ): super(GN, self).__init__() from dgl.nn import pytorch as dgl_pytorch if kwargs == {}: if model_name in DEFAULT_MODEL_KWARGS: kwargs = DEFAULT_MODEL_KWARGS[model_name] self.gn = getattr(dgl_pytorch.conv, model_name)( in_features, out_features, **kwargs ) # register these properties here for downstream handling self.in_features = in_features self.out_features = out_features def forward(self, g, x): return self.gn(g, x) # ============================================================================= # MODULE FUNCTIONS # ============================================================================= def gn(model_name="GraphConv", kwargs={}): from dgl.nn import pytorch as dgl_pytorch if model_name == "GINConv": return lambda in_features, out_features: dgl_pytorch.conv.GINConv( apply_func=torch.nn.Linear(in_features, out_features), aggregator_type="sum", ) else: return lambda in_features, out_features: GN( in_features=in_features, out_features=out_features, model_name=model_name, kwargs=kwargs, ) ================================================ FILE: espaloma/nn/readout/__init__.py ================================================ from . import janossy, graph_level_readout, node_typing, charge_equilibrium ================================================ FILE: espaloma/nn/readout/base_readout.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import abc import torch # ============================================================================= # BASE CLASSES # ============================================================================= class BaseReadout(abc.ABC, torch.nn.Module): """Base class for readout function.""" def __init__(self): super(BaseReadout, self).__init__() @abc.abstractmethod def forward(self, g, x=None, *args, **kwargs): raise NotImplementedError def _forward(self, g, x, *args, **kwargs): raise NotImplementedError ================================================ FILE: espaloma/nn/readout/charge_equilibrium.py ================================================ """ Charge equilibrium.ß """ # ============================================================================= # IMPORTS # ============================================================================= import torch # ============================================================================= # UTILITY FUNCTIONS # ============================================================================= def get_charges(node): """ Solve the function to get the absolute charges of atoms in a molecule from parameters. Parameters ---------- e : tf.Tensor, dtype = tf.float32, electronegativity. s : tf.Tensor, dtype = tf.float32, hardness. Q : tf.Tensor, dtype = tf.float32, shape=(), total charge of a molecule. We use Lagrange multipliers to analytically give the solution. $$ U({\bf q}) &= \sum_{i=1}^N \left[ e_i q_i + \frac{1}{2} s_i q_i^2\right] - \lambda \, \left( \sum_{j=1}^N q_j - Q \right) \\ &= \sum_{i=1}^N \left[ (e_i - \lambda) q_i + \frac{1}{2} s_i q_i^2 \right ] + Q $$ This gives us: $$ q_i^* &= - e_i s_i^{-1} + \lambda s_i^{-1} \\ &= - e_i s_i^{-1} + s_i^{-1} \frac{ Q + \sum\limits_{i=1}^N e_i \, s_i^{-1} }{\sum\limits_{j=1}^N s_j^{-1}} $$ """ e = node.data["e"] s = node.data["s"] sum_e_s_inv = node.data["sum_e_s_inv"] sum_s_inv = node.data["sum_s_inv"] sum_q = node.data["sum_q"] return { "q": -e * s**-1 + (s**-1) * torch.div(sum_q + sum_e_s_inv, sum_s_inv) } # ============================================================================= # MODULE CLASS # ============================================================================= class ChargeEquilibrium(torch.nn.Module): """Charge equilibrium within batches of molecules.""" def __init__(self): super(ChargeEquilibrium, self).__init__() def forward(self, g, total_charge=0.0): """apply charge equilibrium to all molecules in batch""" # calculate $s ^ {-1}$ and $ es ^ {-1}$ import dgl g.apply_nodes( lambda node: {"s_inv": node.data["s"] ** -1}, ntype="n1" ) g.apply_nodes( lambda node: {"e_s_inv": node.data["e"] * node.data["s"] ** -1}, ntype="n1", ) if "sum_q" not in g.nodes["g"].data: if "q_ref" in g.nodes["n1"].data: # get total charge g.update_all( dgl.function.copy_u(u="q_ref", out="m_q"), dgl.function.sum(msg="m_q", out="sum_q"), etype="n1_in_g", ) else: g.nodes["g"].data["sum_q"] = ( torch.ones( g.batch_size, 1, device=g.nodes["n1"].data["s"].device, ) * total_charge ) g.update_all( dgl.function.copy_u(u="sum_q", out="m_sum_q"), dgl.function.sum(msg="m_sum_q", out="sum_q"), etype="g_has_n1", ) # get the sum of $s^{-1}$ and $m_s^{-1}$ g.update_all( dgl.function.copy_u(u="s_inv", out="m_s_inv"), dgl.function.sum(msg="m_s_inv", out="sum_s_inv"), etype="n1_in_g", ) g.update_all( dgl.function.copy_u(u="e_s_inv", out="m_e_s_inv"), dgl.function.sum(msg="m_e_s_inv", out="sum_e_s_inv"), etype="n1_in_g", ) g.update_all( dgl.function.copy_u(u="sum_s_inv", out="m_sum_s_inv"), dgl.function.sum(msg="m_sum_s_inv", out="sum_s_inv"), etype="g_has_n1", ) g.update_all( dgl.function.copy_u(u="sum_e_s_inv", out="m_sum_e_s_inv"), dgl.function.sum(msg="m_sum_e_s_inv", out="sum_e_s_inv"), etype="g_has_n1", ) g.apply_nodes(get_charges, ntype="n1") return g ================================================ FILE: espaloma/nn/readout/graph_level_readout.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch import espaloma as esp # ============================================================================= # MODULE CLASSES # ============================================================================= class GraphLevelReadout(torch.nn.Module): """Readout from graph level.""" def __init__( self, in_features, config_local, config_global, out_name, pool=None, ): super(GraphLevelReadout, self).__init__() import dgl if pool is None: pool = dgl.function.sum self.in_features = in_features self.config_local = config_local self.config_global = config_global self.d_local = esp.nn.sequential._Sequential( in_features=in_features, config=config_local, layer=torch.nn.Linear, ) mid_features = [x for x in config_local if isinstance(x, int)][-1] self.d_global = esp.nn.sequential._Sequential( in_features=mid_features, config=config_global, layer=torch.nn.Linear, ) self.pool = pool self.out_name = out_name def forward(self, g): import dgl g.apply_nodes( lambda node: {"h_global": self.d_local(None, node.data["h"])}, ntype="n1", ) g.update_all( dgl.function.copy_u("h_global", "m"), self.pool("m", "h_global"), etype="n1_in_g", ) g.apply_nodes( lambda node: { self.out_name: self.d_global(None, node.data["h_global"]) }, ntype="g", ) return g ================================================ FILE: espaloma/nn/readout/janossy.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch import espaloma as esp # ============================================================================= # MODULE CLASSES # ============================================================================= class JanossyPooling(torch.nn.Module): """Janossy pooling (arXiv:1811.01900) to average node representation for higher-order nodes. """ def __init__( self, config, in_features, out_features={ 1: ["sigma", "epsilon", "q"], 2: ["k", "eq"], 3: ["k", "eq"], 4: ["k", "eq"], }, out_features_dimensions=-1, pool=torch.add, ): super(JanossyPooling, self).__init__() # if users specify out features as lists, # assume dimensions to be all zero for level in out_features.keys(): if isinstance(out_features[level], list): out_features[level] = dict( zip(out_features[level], [1 for _ in out_features[level]]) ) # bookkeeping self.out_features = out_features self.levels = [key for key in out_features.keys() if key != 1] self.pool = pool # get output features mid_features = [x for x in config if isinstance(x, int)][-1] # set up networks for level in self.levels: # set up individual sequential networks setattr( self, "sequential_%s" % level, esp.nn.sequential._Sequential( in_features=in_features * level, config=config, layer=torch.nn.Linear, ), ) for feature, dimension in self.out_features[level].items(): setattr( self, "f_out_%s_to_%s" % (level, feature), torch.nn.Linear( mid_features, dimension, ), ) if 1 not in self.out_features: return # atom level self.sequential_1 = esp.nn.sequential._Sequential( in_features=in_features, config=config, layer=torch.nn.Linear ) for feature, dimension in self.out_features[1].items(): setattr( self, "f_out_1_to_%s" % feature, torch.nn.Linear( mid_features, dimension, ), ) def forward(self, g): """Forward pass. Parameters ---------- g : dgl.DGLHeteroGraph, input graph. """ import dgl # copy g.multi_update_all( { "n1_as_%s_in_n%s" % (relationship_idx, big_idx): ( dgl.function.copy_u("h", "m%s" % relationship_idx), dgl.function.mean( "m%s" % relationship_idx, "h%s" % relationship_idx ), ) for big_idx in self.levels for relationship_idx in range(big_idx) }, cross_reducer="sum", ) # pool for big_idx in self.levels: if g.number_of_nodes("n%s" % big_idx) == 0: continue g.apply_nodes( func=lambda nodes: { feature: getattr( self, "f_out_%s_to_%s" % (big_idx, feature) )( self.pool( getattr(self, "sequential_%s" % big_idx)( None, torch.cat( [ nodes.data["h%s" % relationship_idx] for relationship_idx in range(big_idx) ], dim=1, ), ), getattr(self, "sequential_%s" % big_idx)( None, torch.cat( [ nodes.data["h%s" % relationship_idx] for relationship_idx in range( big_idx - 1, -1, -1 ) ], dim=1, ), ), ), ) for feature in self.out_features[big_idx].keys() }, ntype="n%s" % big_idx, ) if 1 not in self.out_features: return g # atom level g.apply_nodes( func=lambda nodes: { feature: getattr(self, "f_out_1_to_%s" % feature)( self.sequential_1(g=None, x=nodes.data["h"]) ) for feature in self.out_features[1].keys() }, ntype="n1", ) return g class JanossyPoolingImproper(torch.nn.Module): """Janossy pooling (arXiv:1811.01900) to average node representation for improper torsions. """ def __init__( self, config, in_features, out_features={ "k": 2, }, out_features_dimensions=-1, ): super(JanossyPoolingImproper, self).__init__() # if users specify out features as lists, # assume dimensions to be all zero # bookkeeping self.out_features = out_features self.levels = ["n4_improper"] # get output features mid_features = [x for x in config if isinstance(x, int)][-1] # set up networks for level in self.levels: # set up individual sequential networks setattr( self, "sequential_%s" % level, esp.nn.sequential._Sequential( in_features=4 * in_features, config=config, layer=torch.nn.Linear, ), ) for feature, dimension in self.out_features.items(): setattr( self, "f_out_%s_to_%s" % (level, feature), torch.nn.Linear( mid_features, dimension, ), ) def forward(self, g): """Forward pass. Parameters ---------- g : dgl.DGLHeteroGraph, input graph. """ import dgl # copy g.multi_update_all( { "n1_as_%s_in_%s" % (relationship_idx, big_idx): ( dgl.function.copy_u("h", "m%s" % relationship_idx), dgl.function.mean( "m%s" % relationship_idx, "h%s" % relationship_idx ), ) for big_idx in self.levels for relationship_idx in range(4) }, cross_reducer="sum", ) if g.number_of_nodes("n4_improper") == 0: return g # pool # sum over three cyclic permutations of "h0", "h2", "h3", assuming "h1" is the central atom in the improper # following the smirnoff trefoil convention [(0, 1, 2, 3), (2, 1, 3, 0), (3, 1, 0, 2)] # https://github.com/openff.toolkit/openff.toolkit/blob/166c9864de3455244bd80b2c24656bd7dda3ae2d/openff.toolkit/typing/engines/smirnoff/parameters.py#L3326-L3360 ## Set different permutations based on which definition of impropers ## are being used permuts = [(0, 1, 2, 3), (2, 1, 3, 0), (3, 1, 0, 2)] stack_permuts = lambda nodes, p: torch.cat( [nodes.data[f"h{i}"] for i in p], dim=1 ) for big_idx in self.levels: inner_net = getattr(self, f"sequential_{big_idx}") g.apply_nodes( func=lambda nodes: { feature: getattr(self, f"f_out_{big_idx}_to_{feature}")( torch.sum( torch.stack( [ inner_net( g=None, x=stack_permuts(nodes, p) ) for p in permuts ], dim=0, ), dim=0, ) ) for feature in self.out_features.keys() }, ntype=big_idx, ) return g class JanossyPoolingWithSmirnoffImproper(torch.nn.Module): """Janossy pooling (arXiv:1811.01900) to average node representation for improper torsions. """ def __init__( self, config, in_features, out_features={ "k": 2, }, out_features_dimensions=-1, ): super(JanossyPoolingWithSmirnoffImproper, self).__init__() # if users specify out features as lists, # assume dimensions to be all zero # bookkeeping self.out_features = out_features self.levels = ["n4_improper"] # get output features mid_features = [x for x in config if isinstance(x, int)][-1] # set up networks for level in self.levels: # set up individual sequential networks setattr( self, "sequential_%s" % level, esp.nn.sequential._Sequential( in_features=4 * in_features, config=config, layer=torch.nn.Linear, ), ) for feature, dimension in self.out_features.items(): setattr( self, "f_out_%s_to_%s" % (level, feature), torch.nn.Linear( mid_features, dimension, ), ) def forward(self, g): """Forward pass. Parameters ---------- g : dgl.DGLHeteroGraph, input graph. """ import dgl # copy g.multi_update_all( { "n1_as_%s_in_%s" % (relationship_idx, big_idx): ( dgl.function.copy_u("h", "m%s" % relationship_idx), dgl.function.mean( "m%s" % relationship_idx, "h%s" % relationship_idx ), ) for big_idx in self.levels for relationship_idx in range(4) }, cross_reducer="sum", ) if g.number_of_nodes("n4_improper") == 0: return g # pool # sum over three cyclic permutations of "h0", "h2", "h3", assuming "h1" is the central atom in the improper # following the smirnoff trefoil convention [(0, 1, 2, 3), (2, 1, 3, 0), (3, 1, 0, 2)] # https://github.com/openff.toolkit/openff.toolkit/blob/166c9864de3455244bd80b2c24656bd7dda3ae2d/openff.toolkit/typing/engines/smirnoff/parameters.py#L3326-L3360 ## Set different permutations based on which definition of impropers ## are being used permuts = [(0, 1, 2, 3), (0, 2, 3, 1), (0, 3, 1, 2)] stack_permuts = lambda nodes, p: torch.cat( [nodes.data[f"h{i}"] for i in p], dim=1 ) for big_idx in self.levels: inner_net = getattr(self, f"sequential_{big_idx}") g.apply_nodes( func=lambda nodes: { feature: getattr(self, f"f_out_{big_idx}_to_{feature}")( torch.sum( torch.stack( [ inner_net( g=None, x=stack_permuts(nodes, p) ) for p in permuts ], dim=0, ), dim=0, ) ) for feature in self.out_features.keys() }, ntype=big_idx, ) return g class JanossyPoolingNonbonded(torch.nn.Module): """Janossy pooling (arXiv:1811.01900) to average node representation for nonbonded interactions. """ def __init__( self, config, in_features, out_features={"sigma": 1, "epsilon": 1}, out_features_dimensions=-1, ): super(JanossyPoolingNonbonded, self).__init__() # if users specify out features as lists, # assume dimensions to be all zero # bookkeeping self.out_features = out_features self.levels = ["onefour", "nonbonded"] # get output features mid_features = [x for x in config if isinstance(x, int)][-1] # set up networks for level in self.levels: # set up individual sequential networks setattr( self, "sequential_%s" % level, esp.nn.sequential._Sequential( in_features=2 * in_features, config=config, layer=torch.nn.Linear, ), ) for feature, dimension in self.out_features.items(): setattr( self, "f_out_%s_to_%s" % (level, feature), torch.nn.Linear( mid_features, dimension, ), ) def forward(self, g): """Forward pass. Parameters ---------- g : dgl.DGLHeteroGraph, input graph. """ # copy g.multi_update_all( { "n1_as_%s_in_%s" % (relationship_idx, big_idx): ( dgl.function.copy_u("h", "m%s" % relationship_idx), dgl.function.mean( "m%s" % relationship_idx, "h%s" % relationship_idx ), ) for big_idx in self.levels for relationship_idx in range(2) }, cross_reducer="sum", ) for big_idx in self.levels: g.apply_nodes( func=lambda nodes: { feature: getattr( self, "f_out_%s_to_%s" % (big_idx, feature) )( torch.sum( torch.stack( [ getattr(self, "sequential_%s" % big_idx)( g=None, x=torch.cat( [ nodes.data["h0"], nodes.data["h1"], ], dim=1, ), ), getattr(self, "sequential_%s" % big_idx)( g=None, x=torch.cat( [ nodes.data["h1"], nodes.data["h0"], ], dim=1, ), ), ], dim=0, ), dim=0, ) ) for feature in self.out_features.keys() }, ntype=big_idx, ) return g class ExpCoefficients(torch.nn.Module): def forward(self, g): import math g.nodes["n2"].data["coefficients"] = ( g.nodes["n2"].data["log_coefficients"].exp() ) g.nodes["n3"].data["coefficients"] = ( g.nodes["n3"].data["log_coefficients"].exp() ) return g class LinearMixtureToOriginal(torch.nn.Module): def forward(self, g): import math ( g.nodes["n2"].data["k"], g.nodes["n2"].data["eq"], ) = esp.mm.functional.linear_mixture_to_original( g.nodes["n2"].data["coefficients"][:, 0][:, None], g.nodes["n2"].data["coefficients"][:, 1][:, None], 1.5, 6.0, ) ( g.nodes["n3"].data["k"], g.nodes["n3"].data["eq"], ) = esp.mm.functional.linear_mixture_to_original( g.nodes["n3"].data["coefficients"][:, 0][:, None], g.nodes["n3"].data["coefficients"][:, 1][:, None], 0.0, math.pi, ) g.nodes["n3"].data.pop("coefficients") g.nodes["n2"].data.pop("coefficients") return g ================================================ FILE: espaloma/nn/readout/node_typing.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= import torch from espaloma.nn.readout.base_readout import BaseReadout # ============================================================================= # MODULE CLASSES # ============================================================================= class NodeTyping(BaseReadout): """Simple typing on homograph.""" def __init__(self, in_features, n_classes=100): super(NodeTyping, self).__init__() self.c = torch.nn.Linear(in_features, n_classes) def forward(self, g): g.apply_nodes( ntype="n1", func=lambda node: {"nn_typing": self.c(node.data["h"])}, ) return g ================================================ FILE: espaloma/nn/sequential.py ================================================ """ Chain mutiple layers of GN together. """ import torch class _Sequential(torch.nn.Module): """Sequentially staggered neural networks.""" def __init__( self, layer, config, in_features, model_kwargs={}, ): super(_Sequential, self).__init__() self.exes = [] # init dim dim = in_features # parse the config for idx, exe in enumerate(config): try: exe = float(exe) if exe >= 1: exe = int(exe) except BaseException: pass # int -> feedfoward if isinstance(exe, int): setattr(self, "d" + str(idx), layer(dim, exe, **model_kwargs)) dim = exe self.exes.append("d" + str(idx)) # str -> activation elif isinstance(exe, str): if exe == "bn": setattr(self, "a" + str(idx), torch.nn.BatchNorm1d(dim)) else: activation = getattr(torch.nn.functional, exe) setattr(self, "a" + str(idx), activation) self.exes.append("a" + str(idx)) # float -> dropout elif isinstance(exe, float): dropout = torch.nn.Dropout(exe) setattr(self, "o" + str(idx), dropout) self.exes.append("o" + str(idx)) def forward(self, g, x): for exe in self.exes: if exe.startswith("d"): if g is not None: x = getattr(self, exe)(g, x) else: x = getattr(self, exe)(x) else: x = getattr(self, exe)(x) return x class Sequential(torch.nn.Module): """Sequential neural network with input layers. Parameters ---------- layer : torch.nn.Module DGL graph convolution layers. config : List A sequence of numbers (for units) and strings (for activation functions) denoting the configuration of the sequential model. feature_units : int(default=114) The number of input channels. Methods ------- forward(g, x) Forward pass. """ def __init__( self, layer, config, feature_units=114, input_units=128, model_kwargs={}, ): super(Sequential, self).__init__() # initial featurization self.f_in = torch.nn.Sequential( torch.nn.Linear(feature_units, input_units), torch.nn.Tanh() ) self._sequential = _Sequential( layer, config, in_features=input_units, model_kwargs=model_kwargs ) def _forward(self, g, x): """Forward pass with graph and features.""" for exe in self.exes: if exe.startswith("d"): x = getattr(self, exe)(g, x) else: x = getattr(self, exe)(x) return x def forward(self, g, x=None): """Forward pass. Parameters ---------- g : `dgl.DGLHeteroGraph`, input graph Returns ------- g : `dgl.DGLHeteroGraph` output graph """ import dgl # get homogeneous subgraph g_ = dgl.to_homogeneous(g.edge_type_subgraph(["n1_neighbors_n1"])) if x is None: # get node attributes x = g.nodes["n1"].data["h0"] x = self.f_in(x) # message passing on homo graph x = self._sequential(g_, x) # put attribute back in the graph g.nodes["n1"].data["h"] = x return g ================================================ FILE: espaloma/nn/tests/test_baseline.py ================================================ import pytest @pytest.fixture def baseline(): import espaloma as esp g = esp.Graph("c1ccccc1") # get force field forcefield = esp.graphs.legacy_force_field.LegacyForceField( "smirnoff99Frosst-1.1.0" ) # param / typing operation = forcefield.parametrize operation(g) baseline = esp.nn.baselines.FreeParameterBaseline(g_ref=g.heterograph) return baseline def test_init(baseline): baseline def test_parameter(baseline): print(list(baseline.parameters())) assert len(list(baseline.parameters())) > 0 ================================================ FILE: espaloma/nn/tests/test_janossy.py ================================================ import pytest def test_small_net(): import torch import espaloma as esp # define a layer layer = esp.nn.layers.dgl_legacy.gn("GraphConv") # define a representation representation = esp.nn.Sequential( layer, [32, "tanh", 32, "tanh", 32, "tanh"] ) # define a readout readout = esp.nn.readout.janossy.JanossyPooling( config=[32, "tanh"], in_features=32 ) net = torch.nn.Sequential(representation, readout) g = esp.Graph("c1ccccc1") ================================================ FILE: espaloma/nn/tests/test_simple_net.py ================================================ import pytest def test_small_net(): import torch import espaloma as esp layer = esp.nn.dgl_legacy.gn() net = esp.nn.Sequential(layer, [32, "tanh", 32, "tanh", 32, "tanh"]) ================================================ FILE: espaloma/units.py ================================================ # ============================================================================= # IMPORTS # ============================================================================= from openmm import unit # ============================================================================= # CONSTANTS # ============================================================================= # scaled units PARTICLE = unit.mole.create_unit( 6.02214076e23**-1, "particle", "particle", ) HARTREE_PER_PARTICLE = unit.hartree / PARTICLE # basic units DISTANCE_UNIT = unit.bohr ENERGY_UNIT = HARTREE_PER_PARTICLE FORCE_UNIT = ENERGY_UNIT / DISTANCE_UNIT ANGLE_UNIT = unit.radian CHARGE_UNIT = unit.elementary_charge # compose units FORCE_CONSTANT_UNIT = ENERGY_UNIT / (DISTANCE_UNIT**2) ANGLE_FORCE_CONSTANT_UNIT = ENERGY_UNIT / (ANGLE_UNIT**2) COULOMB_CONSTANT_UNIT = ( ENERGY_UNIT * DISTANCE_UNIT / ((unit.elementary_charge**2)) ) GAS_CONSTANT = ( 8.31446261815324 * unit.joule * (unit.kelvin**-1) * (unit.mole**-1) ).value_in_unit(HARTREE_PER_PARTICLE / unit.kelvin) ================================================ FILE: espaloma/utils/geometry.py ================================================ import numpy as np def _sample_unit_circle(n_samples: int = 1) -> np.ndarray: """ >>> np.isclose(np.linalg.norm(_sample_unit_circle(1)), 1) True """ theta = np.random.rand(n_samples) * 2 * np.pi x = np.cos(theta) y = np.sin(theta) xy = np.array([x, y]).T assert xy.shape == (n_samples, 2) return xy def _sample_four_particle_torsion_scan(n_samples: int = 1) -> np.ndarray: """Generate n_samples random configurations of a 4-particle system abcd where * distances ab, bc, cd are constant, * angles abc, bcd are constant * dihedral angle abcd is uniformly distributed in [0, 2pi] Returns ------- xyz : np.ndarray, shape = (n_samples, 4, 3) Notes ----- * Positions of a,b,c are constant, and x-coordinate of d is constant. To be more exacting, could add random displacements and rotations. """ a = (-3, -1, 0) b = (-2, 0, 0) c = (-1, 0, 0) d = (0, 1, 0) # form one 3D configuration conf = np.array([a, b, c, d]) assert conf.shape == (4, 3) # make n_samples copies xyz = np.array([conf] * n_samples, dtype=float) assert xyz.shape == (n_samples, 4, 3) # assign y and z coordinates of particle d to unit-circle samples xyz[:, 3, 1:] = _sample_unit_circle(n_samples) return xyz def _timemachine_signed_torsion_angle(ci, cj, ck, cl): """Reference implementation from Yutong Zhao's timemachine Copied directly from https://github.com/proteneer/timemachine/blob/1a0ab45e605dc1e28c44ea90f38cb0dedce5c4db/timemachine/potentials/bonded.py#L152-L199 (but with 3 lines of dead code removed, and delta_r inlined) """ rij = cj - ci rkj = cj - ck rkl = cl - ck n1 = np.cross(rij, rkj) n2 = np.cross(rkj, rkl) y = np.sum( np.multiply( np.cross(n1, n2), rkj / np.linalg.norm(rkj, axis=-1, keepdims=True), ), axis=-1, ) x = np.sum(np.multiply(n1, n2), -1) return np.arctan2(y, x) ================================================ FILE: espaloma/utils/model_fetch.py ================================================ from pathlib import Path from typing import Any, Union import requests import torch.utils.model_zoo from tqdm import tqdm def _get_model_url(version: str) -> str: """ Get the URL of the espaloma model from GitHub releases. Parameters: version (str): Version of the model. If set to "latest", the URL for the latest version will be returned. Returns: str: The URL of the espaloma model. Note: - If version is set to "latest", the URL for the latest version of the model will be returned. - The URL is obtained from the GitHub releases of the espaloma repository. Example: >>> url = _get_model_url(version="0.3.0") """ if version == "latest": url = "https://github.com/choderalab/espaloma/releases/latest/download/espaloma-latest.pt" else: # TODO: This scheme requires the version string of the model to match the # release version url = f"https://github.com/choderalab/espaloma/releases/download/{version}/espaloma-{version}.pt" return url def get_model_path( model_dir: Union[str, Path] = ".espaloma/", version: str = "latest", disable_progress_bar: bool = False, overwrite: bool = False, ) -> Path: """ Download a model for espaloma. Parameters: model_dir (str or Path): Directory path where the model will be saved. Default is ``.espaloma/``. version (str): Version of the model to download. Default is "latest". disable_progress_bar (bool): Whether to disable the progress bar during the download. Default is False. overwrite (bool): Whether to overwrite the existing model file if it exists. Default is False. Returns: Path: The path to the downloaded model file. Raises: FileExistsError: If the model file already exists and overwrite is set to False. Note: - If version is set to "latest", the latest version of the model will be downloaded. - The model will be downloaded from GitHub releases. - The model file will be saved in the specified model directory. Example: >>> model_path = get_model(model_dir=".espaloma/", version="0.3.0", disable_progress_bar=True) """ url = _get_model_url(version) # This will work as long as we never have a "/" in the version string file_name = Path(url.split("/")[-1]) model_dir = Path(model_dir) model_path = Path(model_dir / file_name) if not overwrite and model_path.exists(): raise FileExistsError( f"File '{model_path}' exiits, use overwrite=True to overwrite file" ) model_dir.mkdir(parents=True, exist_ok=True) request = requests.get(url, stream=True) request_lenght = int(request.headers.get("content-length", 0)) with open(model_path, "wb") as file, tqdm( total=request_lenght, unit="iB", unit_scale=True, unit_divisor=1024, disable=disable_progress_bar, ) as progress: for data in request.iter_content(chunk_size=1024): size = file.write(data) progress.update(size) return model_path def get_model(version: str = "latest") -> dict[str, Any]: """ Load an espaloma model from GitHub releases. Parameters: version (str): Version of the model to load. Default is "latest". Returns: dict[str, Any]: The loaded espaloma model. Note: - If version is set to "latest", the latest version of the model will be loaded. - The model will be loaded from GitHub releases. - The model will be loaded onto the CPU. Example: >>> model = get_model(version="0.3.0") """ url = _get_model_url(version) model = torch.utils.model_zoo.load_url(url, map_location="cpu") model.eval() # type: ignore return model ================================================ FILE: espaloma/utils/tests/test_model_fetch.py ================================================ import espaloma as esp import torch from openff.toolkit.topology import Molecule def test_get_model_path(tmp_path): model_dir = tmp_path / "latest" model_path = esp.get_model_path(model_dir=model_dir, disable_progress_bar=True) molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") molecule_graph = esp.Graph(molecule) espaloma_model = torch.load(model_path) espaloma_model.eval() espaloma_model(molecule_graph.heterograph) def test_get_model(tmp_path): espaloma_model = esp.get_model() molecule = Molecule.from_smiles("CN1C=NC2=C1C(=O)N(C(=O)N2C)C") molecule_graph = esp.Graph(molecule) espaloma_model(molecule_graph.heterograph) ================================================ FILE: requirements.txt ================================================ dgl torch matplotlib pandas numpy qcportal ================================================ FILE: scripts/README.md ================================================ # Miscellaneous auxiliary scripts for demonstrating espaloma * `perses-benchmark/` - relative alchemical free energy calculations with [perses](http://github.com/choderalab/perses) using espaloma to parameterize small molecules via []`openmmforcefields`](https://github.com/openmm/openmmforcefields) ================================================ FILE: scripts/perses-benchmark/README.md ================================================ # Relative alchemical free energy calculations This is an example of using [perses](http://github.com/choderalab/perses) using espaloma to parameterize small molecules via []`openmmforcefields`](https://github.com/openmm/openmmforcefields) * `tyk2/` - JACS tyk2 system ## Installing perses and espaloma To install perses and espaloma together: ```bash conda env create -n espaloma-perses -f espaloma-perses.yaml ``` To reproduce environment used in paper (on linux-64) ```bash conda env create -n espaloma-perses -f espaloma-perses.export.yaml ``` ================================================ FILE: scripts/perses-benchmark/espaloma-perses.export.yaml ================================================ name: espaloma-perses channels: - dglteam - psi4 - conda-forge - openeye - defaults dependencies: - _libgcc_mutex=0.1=conda_forge - _openmp_mutex=4.5=1_gnu - alabaster=0.7.12=py_0 - ambertools=21.9=py39h69e27f8_0 - argon2-cffi=21.3.0=pyhd8ed1ab_0 - argon2-cffi-bindings=21.2.0=py39h3811e60_1 - arpack=3.7.0=hdefa2d7_2 - arrow-cpp=2.0.0=py39h5894ca3_15_cpu - arsenic=0.2.1=py39hf3d152e_0 - asttokens=2.0.5=pyhd8ed1ab_0 - astunparse=1.6.3=pyhd8ed1ab_0 - attrs=21.4.0=pyhd8ed1ab_0 - aws-c-common=0.4.59=h36c2ea0_1 - aws-c-event-stream=0.1.6=had2084c_6 - aws-checksums=0.1.10=h4e93380_0 - aws-sdk-cpp=1.8.70=h57dc084_1 - babel=2.9.1=pyh44b312d_0 - backcall=0.2.0=pyh9f0ad1d_0 - backports=1.1=pyhd3eb1b0_0 - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 - beautifulsoup4=4.10.0=pyha770c72_0 - blas=1.0=mkl - bleach=5.0.0=pyhd8ed1ab_0 - blosc=1.21.0=h9c3ff4c_0 - bokeh=2.4.2=py39hf3d152e_0 - boost=1.74.0=py39h5472131_5 - boost-cpp=1.74.0=hc6e9bd1_3 - brotli=1.0.9=h166bdaf_7 - brotli-bin=1.0.9=h166bdaf_7 - brotlipy=0.7.0=py39hb9d737c_1004 - bzip2=1.0.8=h7f98852_4 - c-ares=1.18.1=h7f98852_0 - ca-certificates=2022.3.29=h06a4308_0 - cached-property=1.5.2=hd8ed1ab_1 - cached_property=1.5.2=pyha770c72_1 - cairo=1.16.0=h6cf1ce9_1008 - certifi=2021.10.8=py39hf3d152e_2 - cffi=1.15.0=py39h4bc2ebd_0 - cftime=1.6.0=py39hd257fcd_0 - charset-normalizer=2.0.12=pyhd8ed1ab_0 - click=8.1.2=py39hf3d152e_0 - cloudpickle=2.0.0=pyhd8ed1ab_0 - codecov=2.1.11=pyhd3deb0d_0 - colorama=0.4.4=pyh9f0ad1d_0 - coverage=6.3.2=py39hb9d737c_2 - cryptography=36.0.0=py39h9ce1e76_0 - cudatoolkit=10.2.89=h8f6ccaa_10 - curl=7.82.0=h2283fc2_0 - cycler=0.11.0=pyhd8ed1ab_0 - cython=0.29.28=py39h5a03fae_2 - cytoolz=0.11.2=py39hb9d737c_2 - dask=2022.4.0=pyhd8ed1ab_0 - dask-core=2022.4.0=pyhd8ed1ab_0 - dask-jobqueue=0.7.3=pyhd8ed1ab_0 - debugpy=1.5.1=py39he80948d_0 - decorator=5.1.1=pyhd8ed1ab_0 - defusedxml=0.7.1=pyhd8ed1ab_0 - dgl=0.8.0post2=py39_0 - dicttoxml=1.7.4=pyhd8ed1ab_2 - distributed=2022.4.0=pyhd8ed1ab_0 - docutils=0.17.1=py39hf3d152e_1 - entrypoints=0.4=pyhd8ed1ab_0 - executing=0.8.3=pyhd8ed1ab_0 - expat=2.4.8=h27087fc_0 - fftw=3.3.10=nompi_h77c792f_102 - fire=0.4.0=pyh44b312d_0 - flit-core=3.7.1=pyhd8ed1ab_0 - fontconfig=2.14.0=h8e229c2_0 - freetype=2.11.0=h70c0345_0 - fsspec=2022.3.0=pyhd8ed1ab_0 - future=0.18.2=py39hf3d152e_5 - gettext=0.21.0=hf68c758_0 - gflags=2.2.2=he1b5a44_1004 - giflib=5.2.1=h516909a_2 - glog=0.4.0=h49b9bf7_3 - greenlet=1.1.2=py39h5a03fae_2 - grpc-cpp=1.34.1=h2157cd5_4 - h5py=3.6.0=nompi_py39h7e08c79_100 - hdf4=4.2.15=h10796ff_3 - hdf5=1.12.1=nompi_h4df4325_104 - heapdict=1.0.1=py_0 - icu=68.2=h9c3ff4c_0 - idna=3.3=pyhd8ed1ab_0 - imagesize=1.3.0=pyhd8ed1ab_0 - importlib-metadata=4.11.3=py39hf3d152e_1 - importlib_resources=5.6.0=pyhd8ed1ab_0 - iniconfig=1.1.1=pyh9f0ad1d_0 - intel-openmp=2021.4.0=h06a4308_3561 - ipykernel=6.12.0=py39hef51801_0 - ipython=8.2.0=py39hf3d152e_0 - ipython_genutils=0.2.0=py_1 - ipywidgets=7.7.0=pyhd8ed1ab_0 - jedi=0.18.1=py39hf3d152e_1 - jinja2=3.1.1=pyhd8ed1ab_0 - joblib=1.1.0=pyhd8ed1ab_0 - jpeg=9e=h7f98852_0 - jsonschema=4.4.0=pyhd8ed1ab_0 - jupyter_client=7.2.2=pyhd8ed1ab_1 - jupyter_core=4.9.2=py39hf3d152e_0 - jupyterlab_pygments=0.1.2=pyh9f0ad1d_0 - jupyterlab_widgets=1.1.0=pyhd8ed1ab_0 - keyutils=1.6.1=h166bdaf_0 - kiwisolver=1.4.2=py39hf939315_1 - krb5=1.19.3=h08a2579_0 - lcms2=2.12=hddcbb42_0 - ld_impl_linux-64=2.36.1=hea4e1c9_2 - libblas=3.9.0=12_linux64_mkl - libbrotlicommon=1.0.9=h166bdaf_7 - libbrotlidec=1.0.9=h166bdaf_7 - libbrotlienc=1.0.9=h166bdaf_7 - libcblas=3.9.0=12_linux64_mkl - libcurl=7.82.0=h2283fc2_0 - libedit=3.1.20210910=h7f8727e_0 - libev=4.33=h516909a_1 - libevent=2.1.10=h28343ad_4 - libffi=3.4.2=h7f98852_5 - libgcc-ng=11.2.0=h1d223b6_15 - libgfortran-ng=11.2.0=h69a702a_15 - libgfortran5=11.2.0=h5c6108e_15 - libglib=2.70.2=h174f98d_4 - libgomp=11.2.0=h1d223b6_15 - libiconv=1.16=h516909a_0 - liblapack=3.9.0=12_linux64_mkl - libllvm10=10.0.1=he513fc3_3 - libnetcdf=4.8.1=nompi_hb3fd0d9_101 - libnghttp2=1.47.0=he49606f_0 - libnsl=2.0.0=h7f98852_0 - libpng=1.6.37=hed695b0_2 - libprotobuf=3.14.0=h780b84a_0 - libsodium=1.0.18=h516909a_1 - libssh2=1.10.0=ha35d2d1_2 - libstdcxx-ng=11.2.0=he4da1e4_15 - libthrift=0.13.0=hfb8234f_6 - libtiff=4.2.0=hbd63e13_2 - libutf8proc=2.7.0=h7f98852_0 - libuuid=2.32.1=h14c3975_1000 - libwebp=1.2.2=h55f646e_0 - libwebp-base=1.2.2=h7f98852_1 - libxcb=1.14=h7b6447c_0 - libxml2=2.9.12=h72842e0_0 - libxslt=1.1.33=h15afd5d_2 - libzip=1.8.0=h1c5bbd1_1 - libzlib=1.2.11=h166bdaf_1014 - llvmlite=0.36.0=py39h1bbdace_0 - locket=0.2.1=py39h06a4308_2 - lxml=4.8.0=py39hb9d737c_1 - lz4=4.0.0=py39h029007f_1 - lz4-c=1.9.3=h9c3ff4c_1 - lzo=2.10=h516909a_1000 - markupsafe=2.1.1=py39hb9d737c_1 - matplotlib=3.3.2=0 - matplotlib-base=3.3.2=py39h98787fa_1 - matplotlib-inline=0.1.3=pyhd8ed1ab_0 - mdtraj=1.9.7=py39h138c130_1 - mistune=0.8.4=py39h3811e60_1005 - mkl=2021.4.0=h06a4308_640 - mkl-service=2.4.0=py39h3811e60_0 - mpiplus=v0.0.1=py39hde42818_1002 - msgpack-python=1.0.3=py39hf939315_1 - nbclient=0.5.13=pyhd8ed1ab_0 - nbconvert=6.4.5=py39hf3d152e_0 - nbformat=5.3.0=pyhd8ed1ab_0 - ncurses=6.3=h27087fc_1 - nest-asyncio=1.5.5=pyhd8ed1ab_0 - netcdf-fortran=4.5.4=nompi_h2b6e579_100 - netcdf4=1.5.8=nompi_py39h64b754b_101 - networkx=2.7.1=pyhd8ed1ab_0 - nglview=3.0.3=pyh8a188c0_0 - ninja=1.10.2=h4bd325d_1 - nose=1.3.7=py_1006 - nose-timer=1.0.1=pyhd8ed1ab_0 - notebook=6.4.10=pyha770c72_0 - numba=0.53.1=py39h56b8d98_1 - numexpr=2.8.1=py39h6abb31d_0 - numpy=1.22.3=py39h18676bf_1 - numpydoc=1.2.1=pyhd8ed1ab_0 - ocl-icd=2.3.1=h7f98852_0 - ocl-icd-system=1.0.0=1 - openeye-toolkits=2021.2.0=py39_0 - openff-forcefields=2.0.0=pyh6c4a22f_0 - openff-toolkit=0.10.3=pyhd8ed1ab_0 - openff-toolkit-base=0.10.3=pyhd8ed1ab_0 - openmm=7.7.0=py39h9717219_1 - openmmtools=0.21.2=pyhd8ed1ab_0 - openmoltools=0.8.8=pyhd8ed1ab_1 - openssl=3.0.2=h166bdaf_1 - orc=1.6.6=h7950760_1 - packaging=21.3=pyhd8ed1ab_0 - packmol=20.010=h86c2bf4_0 - pandas=1.4.2=py39h1832856_0 - pandoc=2.17.1.1=ha770c72_0 - pandocfilters=1.5.0=pyhd8ed1ab_0 - parmed=3.4.3=py39he80948d_1 - parquet-cpp=1.5.1=1 - parso=0.8.3=pyhd8ed1ab_0 - partd=1.2.0=pyhd8ed1ab_0 - patsy=0.5.2=pyhd8ed1ab_0 - pcre=8.45=h9c3ff4c_0 - pdbfixer=1.8.1=pyh6c4a22f_0 - perl=5.32.1=2_h7f98852_perl5 - perses=0.9.5=pyh8a188c0_0 - pexpect=4.8.0=pyh9f0ad1d_2 - pickleshare=0.7.5=py39hde42818_1002 - pillow=9.0.1=py39h22f2fdc_0 - pint=0.19.1=pyhd8ed1ab_0 - pip=22.0.4=pyhd8ed1ab_0 - pixman=0.40.0=h36c2ea0_0 - plotly=5.7.0=pyhd8ed1ab_0 - pluggy=1.0.0=py39hf3d152e_3 - prometheus_client=0.14.0=pyhd8ed1ab_0 - prompt-toolkit=3.0.29=pyha770c72_0 - psutil=5.9.0=py39hb9d737c_1 - ptyprocess=0.7.0=pyhd3deb0d_0 - pure_eval=0.2.2=pyhd8ed1ab_0 - py=1.11.0=pyh6c4a22f_0 - pyarrow=2.0.0=py39h3ebc44c_15_cpu - pycairo=1.21.0=py39h0934665_1 - pycparser=2.21=pyhd8ed1ab_0 - pydantic=1.9.0=py39hb9d737c_1 - pygments=2.11.2=pyhd8ed1ab_0 - pymbar=3.0.6=py39hd257fcd_0 - pyopenssl=22.0.0=pyhd8ed1ab_0 - pyparsing=3.0.7=pyhd8ed1ab_0 - pyrsistent=0.18.1=py39hb9d737c_1 - pysocks=1.7.1=py39hf3d152e_5 - pytables=3.7.0=py39h2669a42_0 - pytest=7.1.1=py39hf3d152e_1 - pytest-cov=3.0.0=pyhd8ed1ab_0 - python=3.9.12=h2660328_1_cpython - python-dateutil=2.8.2=pyhd8ed1ab_0 - python-fastjsonschema=2.15.3=pyhd8ed1ab_0 - python_abi=3.9=2_cp39 - pytorch=1.10.2=cpu_py39hfa7516b_0 - pytz=2022.1=pyhd8ed1ab_0 - pyyaml=6.0=py39hb9d737c_4 - pyzmq=22.3.0=py39headdf64_2 - qcelemental=0.24.0=pyhd8ed1ab_0 - qcportal=0.15.8=pyhd8ed1ab_0 - rdkit=2022.03.1=py39h89e00b9_0 - re2=2020.11.01=h58526e2_0 - readline=8.1.2=h7f8727e_1 - reportlab=3.5.68=py39he59360d_1 - requests=2.27.1=pyhd8ed1ab_0 - scikit-learn=1.0.2=py39h4dfa638_0 - scipy=1.8.0=py39hee8e79c_1 - seaborn=0.11.2=hd8ed1ab_0 - seaborn-base=0.11.2=pyhd8ed1ab_0 - send2trash=1.8.0=pyhd8ed1ab_0 - setuptools=62.0.0=py39hf3d152e_0 - six=1.16.0=pyh6c4a22f_0 - smirnoff99frosst=1.1.0=pyh44b312d_0 - snappy=1.1.8=he1b5a44_3 - snowballstemmer=2.2.0=pyhd8ed1ab_0 - sortedcontainers=2.4.0=pyhd8ed1ab_0 - soupsieve=2.3.1=pyhd8ed1ab_0 - sphinx=4.5.0=pyh6c4a22f_0 - sphinx_rtd_theme=1.0.0=pyhd8ed1ab_0 - sphinxcontrib-applehelp=1.0.2=py_0 - sphinxcontrib-devhelp=1.0.2=py_0 - sphinxcontrib-htmlhelp=2.0.0=pyhd8ed1ab_0 - sphinxcontrib-jsmath=1.0.1=py_0 - sphinxcontrib-qthelp=1.0.3=py_0 - sphinxcontrib-serializinghtml=1.1.5=pyhd8ed1ab_1 - sqlalchemy=1.4.35=py39hb9d737c_0 - sqlite=3.38.2=hc218d9a_0 - stack_data=0.2.0=pyhd8ed1ab_0 - statsmodels=0.13.2=py39hce5d2b2_0 - tblib=1.7.0=pyhd8ed1ab_0 - tenacity=8.0.1=pyhd8ed1ab_0 - termcolor=1.1.0=py_2 - terminado=0.13.3=py39hf3d152e_1 - testpath=0.6.0=pyhd8ed1ab_0 - threadpoolctl=3.1.0=pyh8a188c0_0 - tinydb=4.7.0=pyhd8ed1ab_0 - tk=8.6.12=h27826a3_0 - toml=0.10.2=pyhd8ed1ab_0 - tomli=2.0.1=pyhd8ed1ab_0 - toolz=0.11.2=pyhd8ed1ab_0 - tornado=6.1=py39hb9d737c_3 - tqdm=4.64.0=pyhd8ed1ab_0 - traitlets=5.1.1=pyhd8ed1ab_0 - typing-extensions=4.1.1=hd8ed1ab_0 - typing_extensions=4.1.1=pyha770c72_0 - tzdata=2022a=h191b570_0 - urllib3=1.26.9=pyhd8ed1ab_0 - validators=0.18.2=pyhd3deb0d_0 - wcwidth=0.2.5=pyh9f0ad1d_2 - webencodings=0.5.1=py_1 - wheel=0.37.1=pyhd8ed1ab_0 - widgetsnbextension=3.6.0=py39hf3d152e_0 - xmltodict=0.12.0=py_0 - xorg-kbproto=1.0.7=h14c3975_1002 - xorg-libice=1.0.10=h516909a_0 - xorg-libsm=1.2.3=hd9c2040_1000 - xorg-libx11=1.7.2=h7f98852_0 - xorg-libxext=1.3.4=h7f98852_1 - xorg-libxrender=0.9.10=h7f98852_1003 - xorg-libxt=1.2.1=h7f98852_2 - xorg-renderproto=0.11.1=h14c3975_1002 - xorg-xextproto=7.3.0=h14c3975_1002 - xorg-xproto=7.0.31=h14c3975_1007 - xz=5.2.5=h516909a_1 - yaml=0.2.5=h7f98852_2 - zeromq=4.3.4=h9c3ff4c_1 - zict=2.1.0=pyhd8ed1ab_0 - zipp=3.8.0=pyhd8ed1ab_0 - zlib=1.2.11=h166bdaf_1014 - zstd=1.4.9=ha95c52a_0 - pip: - amberlite==16.0 - amberutils==21.0 - espaloma==0.2.2 - mmpbsa-py==16.0 - openmmforcefields==0.10.0+27.g1fabf43 - packmol-memgen==1.2.1rc0 - pdb4amber==20.1 - pytraj==2.0.6 - sander==16.0 prefix: /lila/home/chodera/miniconda/envs/espaloma-perses ================================================ FILE: scripts/perses-benchmark/espaloma-perses.yaml ================================================ name: espaloma-perses channels: - conda-forge - dglteam - openeye - defaults - anaconda dependencies: # Base dependencies - python - pip # 3rd party - openeye-toolkits - numpy - matplotlib - scipy - openff-toolkit - openff-forcefields - smirnoff99Frosst - openmm - openmmforcefields - tqdm # Pytorch - pytorch>=1.8.0 - dgl # Testing - pytest - pytest-cov - codecov - nose - nose-timer - coverage - qcportal>=0.15.0 - sphinx - sphinx_rtd_theme # perses - perses # will be added to openmmforcefields conda-forge recipe - validators - pip: # espaloma - git+https://github.com/choderalab/espaloma.git@0.2.2 # openmmforcefield - git+https://github.com/openmm/openmmforcefields.git ================================================ FILE: scripts/perses-benchmark/tyk2/README.md ================================================ # tyk2 benchmarks with perses and espaloma * `openff-1.2.0/` - scripts to use Open Force Field ("Parsley") `openff-1.2.0` small molecule force field * `espaloma-0.2.2/` - scripts to use Espaloma `espaloma-0.2.2` small molecule force field ================================================ FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/LSF-job-template.sh ================================================ #!/bin/bash #BSUB -P "tyk2-benchmark" #BSUB -J "perses-benchmark-[1-24]" #BSUB -n 1 #BSUB -R rusage[mem=8] #BSUB -R span[hosts=1] #BSUB -q gpuqueue #BSUB -sp 1 # low priority. default is 12, max is 25 #BSUB -gpu num=1:j_exclusive=yes:mode=shared #BSUB -W 24:00 #BSUB -o out_%J_%I.stdout #BSUB -eo out_%J_%I.stderr #BSUB -L /bin/bash source ~/.bashrc OPENMM_CPU_THREADS=1 echo "changing directory to ${LS_SUBCWD}" cd $LS_SUBCWD conda activate espaloma-perses # Report node in use hostname # Report CUDA info env | sort | grep 'CUDA' # launching a benchmark pair (target, edge) per job (0-based thus substract 1) python run_benchmarks.py --target tyk2 --edge $(( $LSB_JOBINDEX - 1 )) ================================================ FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/README.md ================================================ # Perses benchmarks This subdirectory exposes a CLI tool for running automated benchmarks from [OpenFF's protein ligand benchmark dataset](https://github.com/openforcefield/protein-ligand-benchmark) using perses. ## Running all edges A script to run all transformations in an LSF batch scheduler is provided, but will likely need to be modified for your batch queue system: ```bash bsub < LSF-job-template.sh ``` ## Running single edges Assuming you have a clone of the perses code repository and you are standing in the `benchmarks` subdirectory (where this file lives). Then the benchmarks can be run using the following command syntax: ```bash python run_benchmarks.py --target [protein-name] --edge [edge-index] ``` For example, for running the seventh edge (zero-based, according to [plbenchmark data](https://github.com/openforcefield/protein-ligand-benchmark) ) for `tyk2` protein, you would run: ```bash # Set up and run edge 6 python run_benchmarks.py --target tyk2 --edge 6 ``` Should the calculation for an edge fail, you can simply re-run the same command-line and the calculation will resume: ```bash # Resume failed edge 6 python run_benchmarks.py --target tyk2 --edge 6 ``` For more information on how to use the tool, you can run `python run_benchmarks.py -h`. ## Analyzing benchmarks To analyze the simulations a script called `benchmark_analysis.py` is used as follows: ```bash python benchmark_analysis.py --target [protein-name] ``` For example, for tyk2 results: ```bash python benchmark_analysis.py --target tyk2 ``` This will generate an output CSV file for [`arsenic`](https://github.com/openforcefield/arsenic) and corresponding absolute and relative free energy plots as PNG files produced according to best practices.) For more information on how to use the cli analysis tool use `python benchmark_analysis.py -h`. ================================================ FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/benchmark_analysis.py ================================================ """ Script to perform analysis of perses simulations executed using run_benchmarks.py script. Intended to be used on systems from https://github.com/openforcefield/protein-ligand-benchmark """ import argparse import glob import itertools import re import warnings import numpy as np import urllib.request import yaml from openmmtools.constants import kB from perses.analysis.load_simulations import Simulation from openmm import unit from openff.arsenic import plotting, wrangle # global variables base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark" # Helper functions def get_simdir_list(base_dir='.', is_reversed=False): """ Get list of directories to extract simulation data. Attributes ---------- base_dir: str, optional, default='.' Base directory where to search for simulations results. Defaults to current directory. is_reversed: bool, optional, default=False Whether to consider the reversed simulations or not. Meant for testing purposes. Returns ------- dir_list: list List of directories paths for simulation results. """ # Load all expected simulation from directories out_dirs = ['/'.join(filepath.split('/')[:-1]) for filepath in glob.glob(f'{base_dir}/out*/*complex.nc')] reg = re.compile(r'out_[0-9]+_[0-9]+_reversed') # regular expression to deal with reversed directories if is_reversed: # Choose only reversed directories out_dirs = list(filter(reg.search, out_dirs)) else: # Filter out reversed directories out_dirs = list(itertools.filterfalse(reg.search, out_dirs)) return out_dirs def get_simulations_data(simulation_dirs): """Generates a list of simulation data objects given the simulation directories paths.""" simulations = [] for out_dir in simulation_dirs: # Load complete or fully working simulations # TODO: Try getting better exceptions from openmmtools -- use non-generic exceptions try: simulation = Simulation(out_dir) simulations.append(simulation) except Exception: warnings.warn(f"Edge in {out_dir} could not be loaded. Check simulation output is complete.") return simulations def to_arsenic_csv(experimental_data: dict, simulation_data: list, out_csv: str = 'out_benchmark.csv'): """ Generates a csv file to be used with openff-arsenic. Energy units in kcal/mol. .. warning:: To be deprecated once arsenic object model is improved. Parameters ---------- experimental_data: dict Python nested dictionary with experimental data in micromolar or nanomolar units. Example of entry: {'lig_ejm_31': {'measurement': {'comment': 'Table 4, entry 31', 'doi': '10.1016/j.ejmech.2013.03.070', 'error': -1, 'type': 'ki', 'unit': 'uM', 'value': 0.096}, 'name': 'lig_ejm_31', 'smiles': '[H]c1c(c(c(c(c1[H])Cl)C(=O)N([H])c2c(c(nc(c2[H])N([H])C(=O)C([H])([H])[H])[H])[H])Cl)[H]'} simulation_data: list or iterable Python iterable object with perses Simulation objects as entries. out_csv: str Path to output csv file to be generated. """ # Ligand information ligands_names = list(ligands_dict.keys()) lig_id_to_name = dict(enumerate(ligands_names)) kBT = kB * 300 * unit.kelvin # useful when converting to kcal/mol # Write csv file with open(out_csv, 'w') as csv_file: # Experimental block # print header for block csv_file.write("# Experimental block\n") csv_file.write("# Ligand, expt_DG, expt_dDG\n") # Extract ligand name, expt_DG and expt_dDG from ligands dictionary for ligand_name, ligand_data in experimental_data.items(): # TODO: Handle multiple measurement types unit_symbol = ligand_data['measurement']['unit'] measurement_value = ligand_data['measurement']['value'] measurement_error = ligand_data['measurement']['error'] # Unit conversion # TODO: Let's persuade PLBenchmarks to use pint units unit_conversions = { 'M' : 1.0, 'mM' : 1e-3, 'uM' : 1e-6, 'nM' : 1e-9, 'pM' : 1e-12, 'fM' : 1e-15 } if unit_symbol not in unit_conversions: raise ValueError(f'Unknown units "{unit_symbol}"') value_to_molar= unit_conversions[unit_symbol] # Handle unknown errors # TODO: We should be able to ensure that all entries have more reasonable errors. if measurement_error == -1: # TODO: For now, we use a relative_error from the Tyk2 system 10.1016/j.ejmech.2013.03.070 relative_error = 0.3 else: relative_error = measurement_error / measurement_value # Convert to free eneriges expt_DG = kBT.value_in_unit(unit.kilocalorie_per_mole) * np.log(measurement_value * value_to_molar) expt_dDG = kBT.value_in_unit(unit.kilocalorie_per_mole) * relative_error csv_file.write(f"{ligand_name}, {expt_DG}, {expt_dDG}\n") # Calculated block # print header for block csv_file.write("# Calculated block\n") csv_file.write("# Ligand1,Ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional)\n") # Loop through simulation, extract ligand1 and ligand2 indices, convert to names, create string with # ligand1, ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional) # write string in csv file for simulation in simulation_data: out_dir = simulation.directory.split('/')[-1] # getting integer indices ligand1_id, ligand2_id = int(out_dir.split('_')[-1]), int(out_dir.split('_')[-2]) # CHECK ORDER! # getting names of ligands ligand1, ligand2 = lig_id_to_name[ligand1_id], lig_id_to_name[ligand2_id] # getting calc_DDG in kcal/mol calc_DDG = simulation.bindingdg.value_in_unit(unit.kilocalorie_per_mole) # getting calc_dDDG in kcal/mol calc_dDDG = simulation.bindingddg.value_in_unit(unit.kilocalorie_per_mole) csv_file.write( f"{ligand1}, {ligand2}, {calc_DDG}, {calc_dDDG}, 0.0\n") # hardcoding additional error as 0.0 # Defining command line arguments # fetching targets from github repo # TODO: This part should be done using plbenchmarks API - once there is a conda pkg targets_url = f"{base_repo_url}/raw/master/data/targets.yml" with urllib.request.urlopen(targets_url) as response: targets_dict = yaml.safe_load(response.read()) # get the possible choices from targets yaml file target_choices = targets_dict.keys() arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks analysis.') arg_parser.add_argument( "--target", type=str, help="Target biomolecule, use openff's plbenchmark names.", choices=target_choices, required=True ) arg_parser.add_argument( "--reversed", action='store_true', help="Analyze reversed edge simulations. Helpful for testing/consistency checks." ) args = arg_parser.parse_args() target = args.target # Download experimental data # TODO: This part should be done using plbenchmarks API - once there is a conda pkg # TODO: Let's cache this data when we set up the initial simulations in case it changes in between setting up and running the calculations and analysis. # TODO: Let's also be sure to use a specific release tag rather than 'master' target_dir = targets_dict[target]['dir'] ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml" with urllib.request.urlopen(ligands_url) as response: yaml_contents = response.read() print(yaml_contents) ligands_dict = yaml.safe_load(yaml_contents) # DEBUG print('') print(yaml.dump(ligands_dict)) # Get paths for simulation output directories out_dirs = get_simdir_list(is_reversed=args.reversed) # Generate list with simulation objects simulations = get_simulations_data(out_dirs) # Generate csv file csv_path = f'./{target}_arsenic.csv' to_arsenic_csv(ligands_dict, simulations, out_csv=csv_path) # TODO: Separate plotting in a different file # Make plots and store fe = wrangle.FEMap(csv_path) # Relative plot plotting.plot_DDGs(fe.graph, target_name=f'{target}', title=f'Relative binding energies - {target}', figsize=5, filename='./plot_relative.pdf' ) # Absolute plot, with experimental data shifted to correct mean experimental_mean_dg = np.asarray([node[1]["exp_DG"] for node in fe.graph.nodes(data=True)]).mean() plotting.plot_DGs(fe.graph, target_name=f'{target}', title=f'Absolute binding energies - {target}', figsize=5, filename='./plot_absolute.pdf', shift=experimental_mean_dg, ) ================================================ FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/run_benchmarks.py ================================================ #!/usr/bin/env python """ CLI utility to automatically run benchmarks using data from the open force field protein-ligand benchmark at https://github.com/openforcefield/protein-ligand-benchmark It requires internet connection to function properly, by connecting to the mentioned repository. """ # TODO: Use plbenchmarks when conda package is available. import argparse import logging import os import yaml from perses.app.setup_relative_calculation import run from perses.utils.url_utils import retrieve_file_url from perses.utils.url_utils import fetch_url_contents # Setting logging level config LOGLEVEL = os.environ.get("LOGLEVEL", "DEBUG").upper() logging.basicConfig( format='%(asctime)s %(levelname)-8s %(message)s', level=LOGLEVEL, datefmt='%Y-%m-%d %H:%M:%S') _logger = logging.getLogger() _logger.setLevel(LOGLEVEL) # global variables base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark" def concatenate_files(input_files, output_file): """ Concatenate files given in input_files iterator into output_file. """ with open(output_file, 'w') as outfile: for filename in input_files: with open(filename) as infile: for line in infile: outfile.write(line) def run_relative_perturbation(lig_a_idx, lig_b_idx, reverse=False, tidy=True): """ Perform relative free energy simulation using perses CLI. Parameters ---------- lig_a_idx : int Index for first ligand (ligand A) lig_b_idx : int Index for second ligand (ligand B) reverse: bool Run the edge in reverse direction. Swaps the ligands. tidy : bool, optional remove auto-generated yaml files. Expects the target/protein pdb file in the same directory to be called 'target.pdb', and ligands file to be called 'ligands.sdf'. """ _logger.info(f'Starting relative calculation of ligand {lig_a_idx} to {lig_b_idx}') trajectory_directory = f'out_{lig_a_idx}_{lig_b_idx}' new_yaml = f'relative_{lig_a_idx}_{lig_b_idx}.yaml' # read base template yaml file # TODO: template.yaml file is configured for Tyk2, check if the same options work for others. with open(f'template.yaml', "r") as yaml_file: options = yaml.load(yaml_file, Loader=yaml.FullLoader) # TODO: add a step to perform some minimization - should help with NaNs # generate yaml file from template options['protein_pdb'] = 'target.pdb' options['ligand_file'] = 'ligands.sdf' if reverse: # Do the other direction of ligands options['old_ligand_index'] = lig_b_idx options['new_ligand_index'] = lig_a_idx # mark the output directory with reversed trajectory_directory = f'{trajectory_directory}_reversed' # mark new yaml file with reversed temp_path = new_yaml.split('.') new_yaml = f'{temp_path[0]}_reversed.{temp_path[1]}' else: options['old_ligand_index'] = lig_a_idx options['new_ligand_index'] = lig_b_idx options['trajectory_directory'] = f'{trajectory_directory}' with open(new_yaml, 'w') as outfile: yaml.dump(options, outfile) # run the simulation - using API point to respect logging level run(new_yaml) _logger.info(f'Relative calculation of ligand {lig_a_idx} to {lig_b_idx} complete') if tidy: os.remove(new_yaml) # Defining command line arguments # fetching targets from github repo # TODO: This part should be done using plbenchmarks API - once there is a conda pkg targets_url = f"{base_repo_url}/raw/master/data/targets.yml" with fetch_url_contents(targets_url) as response: targets_dict = yaml.safe_load(response.read()) # get the possible choices from targets yaml file target_choices = targets_dict.keys() arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks.') arg_parser.add_argument( "--target", type=str, help="Target biomolecule, use openff's plbenchmark names.", choices=target_choices, required=True ) arg_parser.add_argument( "--edge", type=int, help="Edge index (0-based) according to edges yaml file in dataset. Ex. --edge 5 (for sixth edge)", required=True ) arg_parser.add_argument( "--reversed", action='store_true', help="Whether to run the edge in reverse direction. Helpful for consistency checks." ) args = arg_parser.parse_args() target = args.target is_reversed = args.reversed # Fetch protein pdb file # TODO: This part should be done using plbenchmarks API - once there is a conda pkg target_dir = targets_dict[target]['dir'] pdb_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/protein.pdb" pdb_file = retrieve_file_url(pdb_url) # Fetch cofactors crystalwater pdb file # TODO: This part should be done using plbenchmarks API - once there is a conda pkg cofactors_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/cofactors_crystalwater.pdb" cofactors_file = retrieve_file_url(cofactors_url) # Concatenate protein with cofactors pdbs concatenate_files((pdb_file, cofactors_file), 'target.pdb') # Fetch ligands sdf files and concatenate them in one # TODO: This part should be done using plbenchmarks API - once there is a conda pkg ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml" with fetch_url_contents(ligands_url) as response: ligands_dict = yaml.safe_load(response.read()) ligand_files = [] for ligand in ligands_dict.keys(): ligand_url = f"{base_repo_url}/raw/master/data/{target_dir}/02_ligands/{ligand}/crd/{ligand}.sdf" ligand_file = retrieve_file_url(ligand_url) ligand_files.append(ligand_file) # concatenate sdfs concatenate_files(ligand_files, 'ligands.sdf') # run simulation # fetch edges information # TODO: This part should be done using plbenchmarks API - once there is a conda pkg edges_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/edges.yml" with fetch_url_contents(edges_url) as response: edges_dict = yaml.safe_load(response.read()) edges_list = list(edges_dict.values()) # suscriptable edges object - note dicts are ordered for py>=3.7 # edge list to access by index edge_index = args.edge # read from cli arguments edge = edges_list[edge_index] ligand_a_name = edge['ligand_a'] ligand_b_name = edge['ligand_b'] # ligands list to get indices -- preserving same order as upstream yaml file ligands_list = list(ligands_dict.keys()) lig_a_index = ligands_list.index(ligand_a_name) lig_b_index = ligands_list.index(ligand_b_name) # Perform the simulation run_relative_perturbation(lig_a_index, lig_b_index, reverse=is_reversed) ================================================ FILE: scripts/perses-benchmark/tyk2/espaloma-0.2.2/template.yaml ================================================ # Path to protein file protein_pdb: null # Path to ligand SDF file ligand_file: null # Indices of old and new ligands within SDF file old_ligand_index: null new_ligand_index: null # # Force fields # # OpenMM ffxml force field files installed via the openmm-forcefields package # for biopolymers and solvents. # Note that small molecule force field files should NOT be included here. forcefield_files: - amber/ff14SB.xml # ff14SB protein force field - amber/tip3p_standard.xml # TIP3P and recommended monovalent ion parameters - amber/tip3p_HFE_multivalent.xml # for divalent ions - amber/phosaa10.xml # HANDLES THE TPO # Small molecule force field # Options include anything allowed by the openmmforcefields SystemGenerator # e.g. one of ['openff-2.0.0', 'gaff-2.11'] small_molecule_forcefield: espaloma-0.2.2 # # Simulation conditions # # Simulation setup options solvent_padding: 9.0 # angstroms # Use geometry-derived mapping use_given_geometries: true given_geometries_tolerance: 0.2 # angstroms # Atom mapping specification atom_expression: - IntType bond_expession: - DefaultBonds # Multi-state sampling scheme # One of ['repex', 'nonequilibrium', 'sams'] fe_type: repex # Checkpoint interval checkpoint_interval: 50 # number of iterations # Number of equilibration iterations n_equilibration_iterations: 0 # Number of iterations to run n_cycles: 5000 # Number of alchemical intermediate states to use n_states: 12 pressure: 1.0 # atmsopheres temperature: 300.0 # kelvin timestep: 4.0 # femtoseconds # remove_constraints: false # Number of integration stpes per iteration n_steps_per_move_application: 250 # Location for storing trajectories trajectory_directory: null # Prefix for trajectory files (project-specific name) trajectory_prefix: out # Atoms to store in NetCDF files (MDTraj selection syntax) atom_selection: not water # Calculation phases to run # Permitted phases: ['complex', 'solvent', 'vacuum'] phases: - complex - solvent ================================================ FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/LSF-job-template.sh ================================================ #!/bin/bash #BSUB -P "tyk2-benchmark" #BSUB -J "perses-benchmark-[1-24]" #BSUB -n 1 #BSUB -R rusage[mem=8] #BSUB -R span[hosts=1] #BSUB -q gpuqueue #BSUB -sp 1 # low priority. default is 12, max is 25 #BSUB -gpu num=1:j_exclusive=yes:mode=shared #BSUB -W 24:00 #BSUB -o out_%J_%I.stdout #BSUB -eo out_%J_%I.stderr #BSUB -L /bin/bash source ~/.bashrc OPENMM_CPU_THREADS=1 echo "changing directory to ${LS_SUBCWD}" cd $LS_SUBCWD conda activate espaloma-perses # Report node in use hostname # Report CUDA info env | sort | grep 'CUDA' # launching a benchmark pair (target, edge) per job (0-based thus substract 1) python run_benchmarks.py --target tyk2 --edge $(( $LSB_JOBINDEX - 1 )) ================================================ FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/README.md ================================================ # Perses benchmarks This subdirectory exposes a CLI tool for running automated benchmarks from [OpenFF's protein ligand benchmark dataset](https://github.com/openforcefield/protein-ligand-benchmark) using perses. ## Running all edges A script to run all transformations in an LSF batch scheduler is provided, but will likely need to be modified for your batch queue system: ```bash bsub < LSF-job-template.sh ``` ## Running single edges Assuming you have a clone of the perses code repository and you are standing in the `benchmarks` subdirectory (where this file lives). Then the benchmarks can be run using the following command syntax: ```bash python run_benchmarks.py --target [protein-name] --edge [edge-index] ``` For example, for running the seventh edge (zero-based, according to [plbenchmark data](https://github.com/openforcefield/protein-ligand-benchmark) ) for `tyk2` protein, you would run: ```bash # Set up and run edge 6 python run_benchmarks.py --target tyk2 --edge 6 ``` Should the calculation for an edge fail, you can simply re-run the same command-line and the calculation will resume: ```bash # Resume failed edge 6 python run_benchmarks.py --target tyk2 --edge 6 ``` For more information on how to use the tool, you can run `python run_benchmarks.py -h`. ## Analyzing benchmarks To analyze the simulations a script called `benchmark_analysis.py` is used as follows: ```bash python benchmark_analysis.py --target [protein-name] ``` For example, for tyk2 results: ```bash python benchmark_analysis.py --target tyk2 ``` This will generate an output CSV file for [`arsenic`](https://github.com/openforcefield/arsenic) and corresponding absolute and relative free energy plots as PNG files produced according to best practices.) For more information on how to use the cli analysis tool use `python benchmark_analysis.py -h`. ================================================ FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/benchmark_analysis.py ================================================ """ Script to perform analysis of perses simulations executed using run_benchmarks.py script. Intended to be used on systems from https://github.com/openforcefield/protein-ligand-benchmark """ import argparse import glob import itertools import re import warnings import numpy as np import urllib.request import yaml from openmmtools.constants import kB from perses.analysis.load_simulations import Simulation from openmm import unit from openff.arsenic import plotting, wrangle # global variables base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark" # Helper functions def get_simdir_list(base_dir='.', is_reversed=False): """ Get list of directories to extract simulation data. Attributes ---------- base_dir: str, optional, default='.' Base directory where to search for simulations results. Defaults to current directory. is_reversed: bool, optional, default=False Whether to consider the reversed simulations or not. Meant for testing purposes. Returns ------- dir_list: list List of directories paths for simulation results. """ # Load all expected simulation from directories out_dirs = ['/'.join(filepath.split('/')[:-1]) for filepath in glob.glob(f'{base_dir}/out*/*complex.nc')] reg = re.compile(r'out_[0-9]+_[0-9]+_reversed') # regular expression to deal with reversed directories if is_reversed: # Choose only reversed directories out_dirs = list(filter(reg.search, out_dirs)) else: # Filter out reversed directories out_dirs = list(itertools.filterfalse(reg.search, out_dirs)) return out_dirs def get_simulations_data(simulation_dirs): """Generates a list of simulation data objects given the simulation directories paths.""" simulations = [] for out_dir in simulation_dirs: # Load complete or fully working simulations # TODO: Try getting better exceptions from openmmtools -- use non-generic exceptions try: simulation = Simulation(out_dir) simulations.append(simulation) except Exception: warnings.warn(f"Edge in {out_dir} could not be loaded. Check simulation output is complete.") return simulations def to_arsenic_csv(experimental_data: dict, simulation_data: list, out_csv: str = 'out_benchmark.csv'): """ Generates a csv file to be used with openff-arsenic. Energy units in kcal/mol. .. warning:: To be deprecated once arsenic object model is improved. Parameters ---------- experimental_data: dict Python nested dictionary with experimental data in micromolar or nanomolar units. Example of entry: {'lig_ejm_31': {'measurement': {'comment': 'Table 4, entry 31', 'doi': '10.1016/j.ejmech.2013.03.070', 'error': -1, 'type': 'ki', 'unit': 'uM', 'value': 0.096}, 'name': 'lig_ejm_31', 'smiles': '[H]c1c(c(c(c(c1[H])Cl)C(=O)N([H])c2c(c(nc(c2[H])N([H])C(=O)C([H])([H])[H])[H])[H])Cl)[H]'} simulation_data: list or iterable Python iterable object with perses Simulation objects as entries. out_csv: str Path to output csv file to be generated. """ # Ligand information ligands_names = list(ligands_dict.keys()) lig_id_to_name = dict(enumerate(ligands_names)) kBT = kB * 300 * unit.kelvin # useful when converting to kcal/mol # Write csv file with open(out_csv, 'w') as csv_file: # Experimental block # print header for block csv_file.write("# Experimental block\n") csv_file.write("# Ligand, expt_DG, expt_dDG\n") # Extract ligand name, expt_DG and expt_dDG from ligands dictionary for ligand_name, ligand_data in experimental_data.items(): # TODO: Handle multiple measurement types unit_symbol = ligand_data['measurement']['unit'] measurement_value = ligand_data['measurement']['value'] measurement_error = ligand_data['measurement']['error'] # Unit conversion # TODO: Let's persuade PLBenchmarks to use pint units unit_conversions = { 'M' : 1.0, 'mM' : 1e-3, 'uM' : 1e-6, 'nM' : 1e-9, 'pM' : 1e-12, 'fM' : 1e-15 } if unit_symbol not in unit_conversions: raise ValueError(f'Unknown units "{unit_symbol}"') value_to_molar= unit_conversions[unit_symbol] # Handle unknown errors # TODO: We should be able to ensure that all entries have more reasonable errors. if measurement_error == -1: # TODO: For now, we use a relative_error from the Tyk2 system 10.1016/j.ejmech.2013.03.070 relative_error = 0.3 else: relative_error = measurement_error / measurement_value # Convert to free eneriges expt_DG = kBT.value_in_unit(unit.kilocalorie_per_mole) * np.log(measurement_value * value_to_molar) expt_dDG = kBT.value_in_unit(unit.kilocalorie_per_mole) * relative_error csv_file.write(f"{ligand_name}, {expt_DG}, {expt_dDG}\n") # Calculated block # print header for block csv_file.write("# Calculated block\n") csv_file.write("# Ligand1,Ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional)\n") # Loop through simulation, extract ligand1 and ligand2 indices, convert to names, create string with # ligand1, ligand2, calc_DDG, calc_dDDG(MBAR), calc_dDDG(additional) # write string in csv file for simulation in simulation_data: out_dir = simulation.directory.split('/')[-1] # getting integer indices ligand1_id, ligand2_id = int(out_dir.split('_')[-1]), int(out_dir.split('_')[-2]) # CHECK ORDER! # getting names of ligands ligand1, ligand2 = lig_id_to_name[ligand1_id], lig_id_to_name[ligand2_id] # getting calc_DDG in kcal/mol calc_DDG = simulation.bindingdg.value_in_unit(unit.kilocalorie_per_mole) # getting calc_dDDG in kcal/mol calc_dDDG = simulation.bindingddg.value_in_unit(unit.kilocalorie_per_mole) csv_file.write( f"{ligand1}, {ligand2}, {calc_DDG}, {calc_dDDG}, 0.0\n") # hardcoding additional error as 0.0 # Defining command line arguments # fetching targets from github repo # TODO: This part should be done using plbenchmarks API - once there is a conda pkg targets_url = f"{base_repo_url}/raw/master/data/targets.yml" with urllib.request.urlopen(targets_url) as response: targets_dict = yaml.safe_load(response.read()) # get the possible choices from targets yaml file target_choices = targets_dict.keys() arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks analysis.') arg_parser.add_argument( "--target", type=str, help="Target biomolecule, use openff's plbenchmark names.", choices=target_choices, required=True ) arg_parser.add_argument( "--reversed", action='store_true', help="Analyze reversed edge simulations. Helpful for testing/consistency checks." ) args = arg_parser.parse_args() target = args.target # Download experimental data # TODO: This part should be done using plbenchmarks API - once there is a conda pkg # TODO: Let's cache this data when we set up the initial simulations in case it changes in between setting up and running the calculations and analysis. # TODO: Let's also be sure to use a specific release tag rather than 'master' target_dir = targets_dict[target]['dir'] ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml" with urllib.request.urlopen(ligands_url) as response: yaml_contents = response.read() print(yaml_contents) ligands_dict = yaml.safe_load(yaml_contents) # DEBUG print('') print(yaml.dump(ligands_dict)) # Get paths for simulation output directories out_dirs = get_simdir_list(is_reversed=args.reversed) # Generate list with simulation objects simulations = get_simulations_data(out_dirs) # Generate csv file csv_path = f'./{target}_arsenic.csv' to_arsenic_csv(ligands_dict, simulations, out_csv=csv_path) # TODO: Separate plotting in a different file # Make plots and store fe = wrangle.FEMap(csv_path) # Relative plot plotting.plot_DDGs(fe.graph, target_name=f'{target}', title=f'Relative binding energies - {target}', figsize=5, filename='./plot_relative.pdf' ) # Absolute plot, with experimental data shifted to correct mean experimental_mean_dg = np.asarray([node[1]["exp_DG"] for node in fe.graph.nodes(data=True)]).mean() plotting.plot_DGs(fe.graph, target_name=f'{target}', title=f'Absolute binding energies - {target}', figsize=5, filename='./plot_absolute.pdf', shift=experimental_mean_dg, ) ================================================ FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/run_benchmarks.py ================================================ #!/usr/bin/env python """ CLI utility to automatically run benchmarks using data from the open force field protein-ligand benchmark at https://github.com/openforcefield/protein-ligand-benchmark It requires internet connection to function properly, by connecting to the mentioned repository. """ # TODO: Use plbenchmarks when conda package is available. import argparse import logging import os import yaml from perses.app.setup_relative_calculation import run from perses.utils.url_utils import retrieve_file_url from perses.utils.url_utils import fetch_url_contents # Setting logging level config LOGLEVEL = os.environ.get("LOGLEVEL", "DEBUG").upper() logging.basicConfig( format='%(asctime)s %(levelname)-8s %(message)s', level=LOGLEVEL, datefmt='%Y-%m-%d %H:%M:%S') _logger = logging.getLogger() _logger.setLevel(LOGLEVEL) # global variables base_repo_url = "https://github.com/openforcefield/protein-ligand-benchmark" def concatenate_files(input_files, output_file): """ Concatenate files given in input_files iterator into output_file. """ with open(output_file, 'w') as outfile: for filename in input_files: with open(filename) as infile: for line in infile: outfile.write(line) def run_relative_perturbation(lig_a_idx, lig_b_idx, reverse=False, tidy=True): """ Perform relative free energy simulation using perses CLI. Parameters ---------- lig_a_idx : int Index for first ligand (ligand A) lig_b_idx : int Index for second ligand (ligand B) reverse: bool Run the edge in reverse direction. Swaps the ligands. tidy : bool, optional remove auto-generated yaml files. Expects the target/protein pdb file in the same directory to be called 'target.pdb', and ligands file to be called 'ligands.sdf'. """ _logger.info(f'Starting relative calculation of ligand {lig_a_idx} to {lig_b_idx}') trajectory_directory = f'out_{lig_a_idx}_{lig_b_idx}' new_yaml = f'relative_{lig_a_idx}_{lig_b_idx}.yaml' # read base template yaml file # TODO: template.yaml file is configured for Tyk2, check if the same options work for others. with open(f'template.yaml', "r") as yaml_file: options = yaml.load(yaml_file, Loader=yaml.FullLoader) # TODO: add a step to perform some minimization - should help with NaNs # generate yaml file from template options['protein_pdb'] = 'target.pdb' options['ligand_file'] = 'ligands.sdf' if reverse: # Do the other direction of ligands options['old_ligand_index'] = lig_b_idx options['new_ligand_index'] = lig_a_idx # mark the output directory with reversed trajectory_directory = f'{trajectory_directory}_reversed' # mark new yaml file with reversed temp_path = new_yaml.split('.') new_yaml = f'{temp_path[0]}_reversed.{temp_path[1]}' else: options['old_ligand_index'] = lig_a_idx options['new_ligand_index'] = lig_b_idx options['trajectory_directory'] = f'{trajectory_directory}' with open(new_yaml, 'w') as outfile: yaml.dump(options, outfile) # run the simulation - using API point to respect logging level run(new_yaml) _logger.info(f'Relative calculation of ligand {lig_a_idx} to {lig_b_idx} complete') if tidy: os.remove(new_yaml) # Defining command line arguments # fetching targets from github repo # TODO: This part should be done using plbenchmarks API - once there is a conda pkg targets_url = f"{base_repo_url}/raw/master/data/targets.yml" with fetch_url_contents(targets_url) as response: targets_dict = yaml.safe_load(response.read()) # get the possible choices from targets yaml file target_choices = targets_dict.keys() arg_parser = argparse.ArgumentParser(description='CLI tool for running perses protein-ligand benchmarks.') arg_parser.add_argument( "--target", type=str, help="Target biomolecule, use openff's plbenchmark names.", choices=target_choices, required=True ) arg_parser.add_argument( "--edge", type=int, help="Edge index (0-based) according to edges yaml file in dataset. Ex. --edge 5 (for sixth edge)", required=True ) arg_parser.add_argument( "--reversed", action='store_true', help="Whether to run the edge in reverse direction. Helpful for consistency checks." ) args = arg_parser.parse_args() target = args.target is_reversed = args.reversed # Fetch protein pdb file # TODO: This part should be done using plbenchmarks API - once there is a conda pkg target_dir = targets_dict[target]['dir'] pdb_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/protein.pdb" pdb_file = retrieve_file_url(pdb_url) # Fetch cofactors crystalwater pdb file # TODO: This part should be done using plbenchmarks API - once there is a conda pkg cofactors_url = f"{base_repo_url}/raw/master/data/{target_dir}/01_protein/crd/cofactors_crystalwater.pdb" cofactors_file = retrieve_file_url(cofactors_url) # Concatenate protein with cofactors pdbs concatenate_files((pdb_file, cofactors_file), 'target.pdb') # Fetch ligands sdf files and concatenate them in one # TODO: This part should be done using plbenchmarks API - once there is a conda pkg ligands_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/ligands.yml" with fetch_url_contents(ligands_url) as response: ligands_dict = yaml.safe_load(response.read()) ligand_files = [] for ligand in ligands_dict.keys(): ligand_url = f"{base_repo_url}/raw/master/data/{target_dir}/02_ligands/{ligand}/crd/{ligand}.sdf" ligand_file = retrieve_file_url(ligand_url) ligand_files.append(ligand_file) # concatenate sdfs concatenate_files(ligand_files, 'ligands.sdf') # run simulation # fetch edges information # TODO: This part should be done using plbenchmarks API - once there is a conda pkg edges_url = f"{base_repo_url}/raw/master/data/{target_dir}/00_data/edges.yml" with fetch_url_contents(edges_url) as response: edges_dict = yaml.safe_load(response.read()) edges_list = list(edges_dict.values()) # suscriptable edges object - note dicts are ordered for py>=3.7 # edge list to access by index edge_index = args.edge # read from cli arguments edge = edges_list[edge_index] ligand_a_name = edge['ligand_a'] ligand_b_name = edge['ligand_b'] # ligands list to get indices -- preserving same order as upstream yaml file ligands_list = list(ligands_dict.keys()) lig_a_index = ligands_list.index(ligand_a_name) lig_b_index = ligands_list.index(ligand_b_name) # Perform the simulation run_relative_perturbation(lig_a_index, lig_b_index, reverse=is_reversed) ================================================ FILE: scripts/perses-benchmark/tyk2/openff-1.2.0/template.yaml ================================================ # Path to protein file protein_pdb: null # Path to ligand SDF file ligand_file: null # Indices of old and new ligands within SDF file old_ligand_index: null new_ligand_index: null # # Force fields # # OpenMM ffxml force field files installed via the openmm-forcefields package # for biopolymers and solvents. # Note that small molecule force field files should NOT be included here. forcefield_files: - amber/ff14SB.xml # ff14SB protein force field - amber/tip3p_standard.xml # TIP3P and recommended monovalent ion parameters - amber/tip3p_HFE_multivalent.xml # for divalent ions - amber/phosaa10.xml # HANDLES THE TPO # Small molecule force field # Options include anything allowed by the openmmforcefields SystemGenerator # e.g. one of ['openff-2.0.0', 'gaff-2.11'] small_molecule_forcefield: openff-1.2.0 # # Simulation conditions # # Simulation setup options solvent_padding: 9.0 # angstroms # Use geometry-derived mapping use_given_geometries: true given_geometries_tolerance: 0.2 # angstroms # Atom mapping specification atom_expression: - IntType bond_expession: - DefaultBonds # Multi-state sampling scheme # One of ['repex', 'nonequilibrium', 'sams'] fe_type: repex # Checkpoint interval checkpoint_interval: 50 # number of iterations # Number of equilibration iterations n_equilibration_iterations: 0 # Number of iterations to run n_cycles: 5000 # Number of alchemical intermediate states to use n_states: 12 pressure: 1.0 # atmsopheres temperature: 300.0 # kelvin timestep: 4.0 # femtoseconds # remove_constraints: false # Number of integration stpes per iteration n_steps_per_move_application: 250 # Location for storing trajectories trajectory_directory: null # Prefix for trajectory files (project-specific name) trajectory_prefix: out # Atoms to store in NetCDF files (MDTraj selection syntax) atom_selection: not water # Calculation phases to run # Permitted phases: ['complex', 'solvent', 'vacuum'] phases: - complex - solvent ================================================ FILE: setup.cfg ================================================ # Helper file to handle all configs [coverage:run] # .coveragerc to control coverage.py and pytest-cov omit = # Omit the tests */tests/* # Omit generated versioneer espaloma/_version.py [yapf] # YAPF, in .style.yapf files this shows up as "[style]" header COLUMN_LIMIT = 119 INDENT_WIDTH = 4 USE_TABS = False [flake8] # Flake8, PyFlakes, etc max-line-length = 119 [versioneer] # Automatic version numbering scheme VCS = git style = pep440 versionfile_source = espaloma/_version.py versionfile_build = espaloma/_version.py tag_prefix = '' [aliases] test = pytest ================================================ FILE: setup.py ================================================ """ espaloma Extensible Surrogate Potential of Ab initio Learned and Optimized by Message-passing Algorithm """ import sys from setuptools import find_packages, setup import versioneer short_description = __doc__.split("\n") # from https://github.com/pytest-dev/pytest-runner#conditional-requirement needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv) pytest_runner = ['pytest-runner'] if needs_pytest else [] try: with open("README.md", "r") as handle: long_description = handle.read() except: long_description = "\n".join(short_description[2:]) setup( # Self-descriptive entries which should always be present name='espaloma', author='Yuanqing Wang @ choderalab // MSKCC', author_email='wangyq@wangyq.net', description=short_description[0], long_description=long_description, long_description_content_type="text/markdown", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), license='MIT', # Which Python importable modules should be included when your package is installed # Handled automatically by setuptools. Use 'exclude' to prevent some specific # subpackage(s) from being added, if needed packages=find_packages(), # Optional include package data to ship with your package # Customize MANIFEST.in if the general case does not suit your needs # Comment out this line to prevent the files from being packaged with your software include_package_data=True, # Allows `setup.py test` to work correctly with pytest setup_requires=[] + pytest_runner, # Additional entries you may want simply uncomment the lines you want and fill in the data # url='http://www.my_package.com', # Website # install_requires=[], # Required packages, pulls from pip if needed; do not use for Conda deployment # platforms=['Linux', # 'Mac OS-X', # 'Unix', # 'Windows'], # Valid platforms your code works on, adjust to your flavor # python_requires=">=3.5", # Python version restrictions # Manual control if final package is compressible or not, set False to prevent the .egg from being made # zip_safe=False, ) ================================================ FILE: versioneer.py ================================================ # Version: 0.29 """The Versioneer - like a rocketeer, but for versions. The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/python-versioneer/python-versioneer * Brian Warner * License: Public Domain (Unlicense) * Compatible with: Python 3.7, 3.8, 3.9, 3.10, 3.11 and pypy3 * [![Latest Version][pypi-image]][pypi-url] * [![Build Status][travis-image]][travis-url] This is a tool for managing a recorded version number in setuptools-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install Versioneer provides two installation modes. The "classic" vendored mode installs a copy of versioneer into your repository. The experimental build-time dependency mode is intended to allow you to skip this step and simplify the process of upgrading. ### Vendored mode * `pip install versioneer` to somewhere in your $PATH * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is available, so you can also use `conda install -c conda-forge versioneer` * add a `[tool.versioneer]` section to your `pyproject.toml` or a `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) * Note that you will need to add `tomli; python_version < "3.11"` to your build-time dependencies if you use `pyproject.toml` * run `versioneer install --vendor` in your source tree, commit the results * verify version information with `python setup.py version` ### Build-time dependency mode * `pip install versioneer` to somewhere in your $PATH * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is available, so you can also use `conda install -c conda-forge versioneer` * add a `[tool.versioneer]` section to your `pyproject.toml` or a `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) * add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) to the `requires` key of the `build-system` table in `pyproject.toml`: ```toml [build-system] requires = ["setuptools", "versioneer[toml]"] build-backend = "setuptools.build_meta" ``` * run `versioneer install --no-vendor` in your source tree, commit the results * verify version information with `python setup.py version` ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes). The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation See [INSTALL.md](./INSTALL.md) for detailed installation instructions. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the commit date in ISO 8601 format. This will be None if the date is not available. * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See [details.md](details.md) in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Known Limitations Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github [issues page](https://github.com/python-versioneer/python-versioneer/issues). ### Subprojects Versioneer has limited support for source trees in which `setup.py` is not in the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are two common reasons why `setup.py` might not be in the root: * Source trees which contain multiple subprojects, such as [Buildbot](https://github.com/buildbot/buildbot), which contains both "master" and "slave" subprojects, each with their own `setup.py`, `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also provide bindings to Python (and perhaps other languages) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs and implementation details which frequently cause `pip install .` from a subproject directory to fail to find a correct version string (so it usually defaults to `0+unknown`). `pip install --editable .` should work correctly. `setup.py install` might work too. Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. [Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking this issue. The discussion in [PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve pip to let Versioneer work correctly. Versioneer-0.16 and earlier only looked for a `.git` directory next to the `setup.cfg`, so subprojects were completely unsupported with those releases. ### Editable installs with setuptools <= 18.5 `setup.py develop` and `pip install --editable .` allow you to install a project into a virtualenv once, then continue editing the source code (and test) without re-installing after every change. "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a convenient way to specify executable scripts that should be installed along with the python package. These both work as expected when using modern setuptools. When using setuptools-18.5 or earlier, however, certain operations will cause `pkg_resources.DistributionNotFound` errors when running the entrypoint script, which must be resolved by re-installing the package. This happens when the install happens with one version, then the egg_info data is regenerated while a different version is checked out. Many setup.py commands cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. [Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg` and `pyproject.toml`, if necessary, to include any new configuration settings indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. * re-run `versioneer install --[no-]vendor` in your source tree, to replace `SRC/_version.py` * commit any changed files ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## Similar projects * [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time dependency * [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of versioneer * [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools plugin ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. Specifically, both are released under the "Unlicense", as described in https://unlicense.org/. [pypi-image]: https://img.shields.io/pypi/v/versioneer.svg [pypi-url]: https://pypi.python.org/pypi/versioneer/ [travis-image]: https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg [travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer """ # pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring # pylint:disable=missing-class-docstring,too-many-branches,too-many-statements # pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error # pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with # pylint:disable=attribute-defined-outside-init,too-many-arguments import configparser import errno import json import os import re import subprocess import sys from pathlib import Path from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Union from typing import NoReturn import functools have_tomllib = True if sys.version_info >= (3, 11): import tomllib else: try: import tomli as tomllib except ImportError: have_tomllib = False class VersioneerConfig: """Container for Versioneer configuration parameters.""" VCS: str style: str tag_prefix: str versionfile_source: str versionfile_build: Optional[str] parentdir_prefix: Optional[str] verbose: Optional[bool] def get_root() -> str: """Get the project root directory. We require that all commands are run from the project root, i.e. the directory that contains setup.py, setup.cfg, and versioneer.py . """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") if not ( os.path.exists(setup_py) or os.path.exists(pyproject_toml) or os.path.exists(versioneer_py) ): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") pyproject_toml = os.path.join(root, "pyproject.toml") versioneer_py = os.path.join(root, "versioneer.py") if not ( os.path.exists(setup_py) or os.path.exists(pyproject_toml) or os.path.exists(versioneer_py) ): err = ("Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND').") raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. my_path = os.path.realpath(os.path.abspath(__file__)) me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): print("Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(my_path), versioneer_py)) except NameError: pass return root def get_config_from_root(root: str) -> VersioneerConfig: """Read the project setup.cfg file to determine Versioneer config.""" # This might raise OSError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . root_pth = Path(root) pyproject_toml = root_pth / "pyproject.toml" setup_cfg = root_pth / "setup.cfg" section: Union[Dict[str, Any], configparser.SectionProxy, None] = None if pyproject_toml.exists() and have_tomllib: try: with open(pyproject_toml, 'rb') as fobj: pp = tomllib.load(fobj) section = pp['tool']['versioneer'] except (tomllib.TOMLDecodeError, KeyError) as e: print(f"Failed to load config from {pyproject_toml}: {e}") print("Try to load it from setup.cfg") if not section: parser = configparser.ConfigParser() with open(setup_cfg) as cfg_file: parser.read_file(cfg_file) parser.get("versioneer", "VCS") # raise error if missing section = parser["versioneer"] # `cast`` really shouldn't be used, but its simplest for the # common VersioneerConfig users at the moment. We verify against # `None` values elsewhere where it matters cfg = VersioneerConfig() cfg.VCS = section['VCS'] cfg.style = section.get("style", "") cfg.versionfile_source = cast(str, section.get("versionfile_source")) cfg.versionfile_build = section.get("versionfile_build") cfg.tag_prefix = cast(str, section.get("tag_prefix")) if cfg.tag_prefix in ("''", '""', None): cfg.tag_prefix = "" cfg.parentdir_prefix = section.get("parentdir_prefix") if isinstance(section, configparser.SectionProxy): # Make sure configparser translates to bool cfg.verbose = section.getboolean("verbose") else: cfg.verbose = section.get("verbose") return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" # these dictionaries contain VCS-specific tools LONG_VERSION_PY: Dict[str, str] = {} HANDLERS: Dict[str, Dict[str, Callable]] = {} def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" HANDLERS.setdefault(vcs, {})[method] = f return f return decorate def run_command( commands: List[str], args: List[str], cwd: Optional[str] = None, verbose: bool = False, hide_stderr: bool = False, env: Optional[Dict[str, str]] = None, ) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW popen_kwargs["startupinfo"] = startupinfo for command in commands: try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git process = subprocess.Popen([command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), **popen_kwargs) break except OSError as e: if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, process.returncode return stdout, process.returncode LONG_VERSION_PY['git'] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. # Generated by versioneer-0.29 # https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" import errno import os import re import subprocess import sys from typing import Any, Callable, Dict, List, Optional, Tuple import functools def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" VCS: str style: str tag_prefix: str parentdir_prefix: str versionfile_source: str verbose: bool def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY: Dict[str, str] = {} HANDLERS: Dict[str, Dict[str, Callable]] = {} def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command( commands: List[str], args: List[str], cwd: Optional[str] = None, verbose: bool = False, hide_stderr: bool = False, env: Optional[Dict[str, str]] = None, ) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW popen_kwargs["startupinfo"] = startupinfo for command in commands: try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git process = subprocess.Popen([command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), **popen_kwargs) break except OSError as e: if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) return None, process.returncode return stdout, process.returncode def versions_from_parentdir( parentdir_prefix: str, root: str, verbose: bool, ) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) except OSError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords( keywords: Dict[str, str], tag_prefix: str, verbose: bool, ) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') if not re.match(r'\d', r): continue if verbose: print("picking %%s" %% r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs( tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command ) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # GIT_DIR can interfere with correct operation of Versioneer. # It may be intended to be passed to the Versioneer-versioned project, # but that should not change where we get our version from. env = os.environ.copy() env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = runner(GITS, [ "describe", "--tags", "--dirty", "--always", "--long", "--match", f"{tag_prefix}[[:digit:]]*" ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") branch_name = branch_name.strip() if branch_name == "HEAD": # If we aren't exactly on a branch, pick a branch which represents # the current commit. If all else fails, we are on a branchless # commit. branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) # --contains was added in git-1.5.4 if rc != 0 or branches is None: raise NotThisMethod("'git branch --contains' returned error") branches = branches.split("\n") # Remove the first line if we're running detached if "(" in branches[0]: branches.pop(0) # Strip off the leading "* " from the list of branches. branches = [branch[2:] for branch in branches] if "master" in branches: branch_name = "master" elif not branches: branch_name = None else: # Pick the first branch that is returned. Good or bad. branch_name = branches[0] pieces["branch"] = branch_name # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards (a feature branch will appear "older" than the master branch). Exceptions: 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the post-release version number (or -1 if no post-release segment is present). """ vc = str.split(ver, ".post") return vc[0], int(vc[1] or 0) if len(vc) == 2 else None def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: if pieces["distance"]: # update the post release segment tag_version, post_version = pep440_split_post(pieces["closest-tag"]) rendered = tag_version if post_version is not None: rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) else: rendered += ".post0.dev%%d" %% (pieces["distance"]) else: # no commits, use the tag as the version rendered = pieces["closest-tag"] else: # exception #1 rendered = "0.post0.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. Exceptions: 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-branch": rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-post-branch": rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) except OSError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords( keywords: Dict[str, str], tag_prefix: str, verbose: bool, ) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') if not re.match(r'\d', r): continue if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs( tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command ) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # GIT_DIR can interfere with correct operation of Versioneer. # It may be intended to be passed to the Versioneer-versioned project, # but that should not change where we get our version from. env = os.environ.copy() env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = runner(GITS, [ "describe", "--tags", "--dirty", "--always", "--long", "--match", f"{tag_prefix}[[:digit:]]*" ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") branch_name = branch_name.strip() if branch_name == "HEAD": # If we aren't exactly on a branch, pick a branch which represents # the current commit. If all else fails, we are on a branchless # commit. branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) # --contains was added in git-1.5.4 if rc != 0 or branches is None: raise NotThisMethod("'git branch --contains' returned error") branches = branches.split("\n") # Remove the first line if we're running detached if "(" in branches[0]: branches.pop(0) # Strip off the leading "* " from the list of branches. branches = [branch[2:] for branch in branches] if "master" in branches: branch_name = "master" elif not branches: branch_name = None else: # Pick the first branch that is returned. Good or bad. branch_name = branches[0] pieces["branch"] = branch_name # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def do_vcs_install(versionfile_source: str, ipy: Optional[str]) -> None: """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py for export-subst keyword substitution. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [versionfile_source] if ipy: files.append(ipy) if "VERSIONEER_PEP518" not in globals(): try: my_path = __file__ if my_path.endswith((".pyc", ".pyo")): my_path = os.path.splitext(my_path)[0] + ".py" versioneer_file = os.path.relpath(my_path) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: with open(".gitattributes", "r") as fobj: for line in fobj: if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True break except OSError: pass if not present: with open(".gitattributes", "a+") as fobj: fobj.write(f"{versionfile_source} export-subst\n") files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir( parentdir_prefix: str, root: str, verbose: bool, ) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.29) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename: str) -> Dict[str, Any]: """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() except OSError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename: str, versions: Dict[str, Any]) -> None: """Write the given version number to the given _version.py file.""" contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards (a feature branch will appear "older" than the master branch). Exceptions: 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the post-release version number (or -1 if no post-release segment is present). """ vc = str.split(ver, ".post") return vc[0], int(vc[1] or 0) if len(vc) == 2 else None def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: if pieces["distance"]: # update the post release segment tag_version, post_version = pep440_split_post(pieces["closest-tag"]) rendered = tag_version if post_version is not None: rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) else: rendered += ".post0.dev%d" % (pieces["distance"]) else: # no commits, use the tag as the version rendered = pieces["closest-tag"] else: # exception #1 rendered = "0.post0.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. Exceptions: 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += "+g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-branch": rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-post-branch": rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" def get_versions(verbose: bool = False) -> Dict[str, Any]: """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. """ if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or bool(cfg.verbose) # `bool()` used to avoid `None` assert cfg.versionfile_source is not None, \ "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} def get_version() -> str: """Get the short version string for this project.""" return get_versions()["version"] def get_cmdclass(cmdclass: Optional[Dict[str, Any]] = None): """Get the custom setuptools subclasses used by Versioneer. If the package uses a different cmdclass (e.g. one from numpy), it should be provide as an argument. """ if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/python-versioneer/python-versioneer/issues/52 cmds = {} if cmdclass is None else cmdclass.copy() # we add "version" to setuptools from setuptools import Command class cmd_version(Command): description = "report generated version string" user_options: List[Tuple[str, str, str]] = [] boolean_options: List[str] = [] def initialize_options(self) -> None: pass def finalize_options(self) -> None: pass def run(self) -> None: vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? # pip install: # copies source tree to a tempdir before running egg_info/etc # if .git isn't copied too, 'git describe' will fail # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? # pip install -e . and setuptool/editable_wheel will invoke build_py # but the build_py command is not expected to copy any files. # we override different "build_py" commands for both environments if 'build_py' in cmds: _build_py: Any = cmds['build_py'] else: from setuptools.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) if getattr(self, "editable_mode", False): # During editable installs `.py` and data files are # not copied to build_lib return # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if 'build_ext' in cmds: _build_ext: Any = cmds['build_ext'] else: from setuptools.command.build_ext import build_ext as _build_ext class cmd_build_ext(_build_ext): def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_ext.run(self) if self.inplace: # build_ext --inplace will only build extensions in # build/lib<..> dir with no _version.py to write to. # As in place builds will already have a _version.py # in the module dir, we do not need to write one. return # now locate _version.py in the new build/ directory and replace # it with an updated value if not cfg.versionfile_build: return target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) if not os.path.exists(target_versionfile): print(f"Warning: {target_versionfile} does not exist, skipping " "version update. This can happen if you are running build_ext " "without first running build_py.") return print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_ext"] = cmd_build_ext if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe # type: ignore # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION # "product_version": versioneer.get_version(), # ... class cmd_build_exe(_build_exe): def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] if 'py2exe' in sys.modules: # py2exe enabled? try: from py2exe.setuptools_buildexe import py2exe as _py2exe # type: ignore except ImportError: from py2exe.distutils_buildexe import py2exe as _py2exe # type: ignore class cmd_py2exe(_py2exe): def run(self) -> None: root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _py2exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) cmds["py2exe"] = cmd_py2exe # sdist farms its file list building out to egg_info if 'egg_info' in cmds: _egg_info: Any = cmds['egg_info'] else: from setuptools.command.egg_info import egg_info as _egg_info class cmd_egg_info(_egg_info): def find_sources(self) -> None: # egg_info.find_sources builds the manifest list and writes it # in one shot super().find_sources() # Modify the filelist and normalize it root = get_root() cfg = get_config_from_root(root) self.filelist.append('versioneer.py') if cfg.versionfile_source: # There are rare cases where versionfile_source might not be # included by default, so we must be explicit self.filelist.append(cfg.versionfile_source) self.filelist.sort() self.filelist.remove_duplicates() # The write method is hidden in the manifest_maker instance that # generated the filelist and was thrown away # We will instead replicate their final normalization (to unicode, # and POSIX-style paths) from setuptools import unicode_utils normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') for f in self.filelist.files] manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') with open(manifest_filename, 'w') as fobj: fobj.write('\n'.join(normalized)) cmds['egg_info'] = cmd_egg_info # we override different "sdist" commands for both environments if 'sdist' in cmds: _sdist: Any = cmds['sdist'] else: from setuptools.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self) -> None: versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir: str, files: List[str]) -> None: root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ OLD_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ INIT_PY_SNIPPET = """ from . import {0} __version__ = {0}.get_versions()['version'] """ def do_setup() -> int: """Do main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (OSError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write(LONG % {"DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, }) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") maybe_ipy: Optional[str] = ipy if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except OSError: old = "" module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] snippet = INIT_PY_SNIPPET.format(module) if OLD_SNIPPET in old: print(" replacing boilerplate in %s" % ipy) with open(ipy, "w") as f: f.write(old.replace(OLD_SNIPPET, snippet)) elif snippet not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(snippet) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) maybe_ipy = None # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. do_vcs_install(cfg.versionfile_source, maybe_ipy) return 0 def scan_setup_py() -> int: """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors def setup_command() -> NoReturn: """Set up Versioneer and exit with appropriate error code.""" errors = do_setup() errors += scan_setup_py() sys.exit(1 if errors else 0) if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": setup_command()